|
@@ -1832,3898 +1832,3897 @@ int num_regs;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
-static boolean group_in_compile_stack _RE_ARGS((compile_stack_type
|
|
|
- compile_stack,
|
|
|
+
|
|
|
|
|
|
- regnum_t regnum));
|
|
|
+
|
|
|
|
|
|
-
|
|
|
- Returns one of error codes defined in `regex.h', or zero for success.
|
|
|
+static inline void store_op1(op, loc, arg)
|
|
|
+re_opcode_t op;
|
|
|
+unsigned char *loc;
|
|
|
+int arg;
|
|
|
+{
|
|
|
+ *loc = (unsigned char) op;
|
|
|
+ STORE_NUMBER(loc + 1, arg);
|
|
|
+}
|
|
|
|
|
|
- Assumes the `allocated' (and perhaps `buffer') and `translate'
|
|
|
- fields are set in BUFP on entry.
|
|
|
|
|
|
- If it succeeds, results are put in BUFP (if it returns an error, the
|
|
|
- contents of BUFP are undefined):
|
|
|
- `buffer' is the compiled pattern;
|
|
|
- `syntax' is set to SYNTAX;
|
|
|
- `used' is set to the length of the compiled pattern;
|
|
|
- `fastmap_accurate' is zero;
|
|
|
- `re_nsub' is the number of subexpressions in PATTERN;
|
|
|
- `not_bol' and `not_eol' are zero;
|
|
|
+
|
|
|
|
|
|
- The `fastmap' and `newline_anchor' fields are neither
|
|
|
- examined nor set. */
|
|
|
+static void store_op2(op, loc, arg1, arg2)
|
|
|
+re_opcode_t op;
|
|
|
+unsigned char *loc;
|
|
|
+int arg1, arg2;
|
|
|
+{
|
|
|
+ *loc = (unsigned char) op;
|
|
|
+ STORE_NUMBER(loc + 1, arg1);
|
|
|
+ STORE_NUMBER(loc + 3, arg2);
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
-#define FREE_STACK_RETURN(value) \
|
|
|
- return (free (compile_stack.stack), value)
|
|
|
|
|
|
-static reg_errcode_t regex_compile(pattern, size, syntax, bufp)
|
|
|
-const char *pattern;
|
|
|
-size_t size;
|
|
|
-reg_syntax_t syntax;
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
+
|
|
|
+ for OP followed by two-byte integer parameter ARG. */
|
|
|
+
|
|
|
+static void insert_op1(op, loc, arg, end)
|
|
|
+re_opcode_t op;
|
|
|
+unsigned char *loc;
|
|
|
+int arg;
|
|
|
+unsigned char *end;
|
|
|
{
|
|
|
-
|
|
|
- `char *' (i.e., signed), we declare these variables as unsigned, so
|
|
|
- they can be reliably used as array indices. */
|
|
|
- register unsigned char c, c1;
|
|
|
+ register unsigned char *pfrom = end;
|
|
|
+ register unsigned char *pto = end + 3;
|
|
|
|
|
|
-
|
|
|
- const char *p1;
|
|
|
+ while (pfrom != loc)
|
|
|
+ *--pto = *--pfrom;
|
|
|
|
|
|
-
|
|
|
- register unsigned char *b;
|
|
|
+ store_op1(op, loc, arg);
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- compile_stack_type compile_stack;
|
|
|
|
|
|
-
|
|
|
- const char *p = pattern;
|
|
|
- const char *pend = pattern + size;
|
|
|
+
|
|
|
|
|
|
-
|
|
|
- RE_TRANSLATE_TYPE translate = bufp->translate;
|
|
|
+static void insert_op2(op, loc, arg1, arg2, end)
|
|
|
+re_opcode_t op;
|
|
|
+unsigned char *loc;
|
|
|
+int arg1, arg2;
|
|
|
+unsigned char *end;
|
|
|
+{
|
|
|
+ register unsigned char *pfrom = end;
|
|
|
+ register unsigned char *pto = end + 5;
|
|
|
|
|
|
-
|
|
|
- command. This makes it possible to tell if a new exact-match
|
|
|
- character can be added to that command or if the character requires
|
|
|
- a new `exactn' command. */
|
|
|
- unsigned char *pending_exact = 0;
|
|
|
+ while (pfrom != loc)
|
|
|
+ *--pto = *--pfrom;
|
|
|
|
|
|
-
|
|
|
- This tells, e.g., postfix * where to find the start of its
|
|
|
- operand. Reset at the beginning of groups and alternatives. */
|
|
|
- unsigned char *laststart = 0;
|
|
|
+ store_op2(op, loc, arg1, arg2);
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- unsigned char *begalt;
|
|
|
|
|
|
-
|
|
|
- which to go back if the interval is invalid. */
|
|
|
- const char *beg_interval;
|
|
|
+
|
|
|
+ after an alternative or a begin-subexpression. We assume there is at
|
|
|
+ least one character before the ^. */
|
|
|
|
|
|
-
|
|
|
- the containing expression. Each alternative of an `or' -- except the
|
|
|
- last -- ends with a forward jump of this sort. */
|
|
|
- unsigned char *fixup_alt_jump = 0;
|
|
|
+static boolean at_begline_loc_p(pattern, p, syntax)
|
|
|
+const char *pattern, *p;
|
|
|
+reg_syntax_t syntax;
|
|
|
+{
|
|
|
+ const char *prev = p - 2;
|
|
|
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
|
|
|
|
|
|
-
|
|
|
- matching close-group on the compile stack, so the same register
|
|
|
- number is put in the stop_memory as the start_memory. */
|
|
|
- regnum_t regnum = 0;
|
|
|
+ return
|
|
|
+
|
|
|
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
|
|
|
+
|
|
|
+ || (*prev == '|'
|
|
|
+ && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
|
|
|
+}
|
|
|
|
|
|
-#ifdef DEBUG
|
|
|
- DEBUG_PRINT1("\nCompiling pattern: ");
|
|
|
- if (debug) {
|
|
|
- unsigned debug_count;
|
|
|
|
|
|
- for (debug_count = 0; debug_count < size; debug_count++)
|
|
|
- putchar(pattern[debug_count]);
|
|
|
- putchar('\n');
|
|
|
- }
|
|
|
-#endif
|
|
|
+
|
|
|
+ at least one character after the $, i.e., `P < PEND'. */
|
|
|
|
|
|
-
|
|
|
- compile_stack.stack =
|
|
|
- TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
|
|
|
- if (compile_stack.stack == NULL)
|
|
|
- return REG_ESPACE;
|
|
|
+static boolean at_endline_loc_p(p, pend, syntax)
|
|
|
+const char *p, *pend;
|
|
|
+reg_syntax_t syntax;
|
|
|
+{
|
|
|
+ const char *next = p;
|
|
|
+ boolean next_backslash = *next == '\\';
|
|
|
+ const char *next_next = p + 1 < pend ? p + 1 : 0;
|
|
|
|
|
|
- compile_stack.size = INIT_COMPILE_STACK_SIZE;
|
|
|
- compile_stack.avail = 0;
|
|
|
+ return
|
|
|
+
|
|
|
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
|
|
|
+ : next_backslash && next_next && *next_next == ')')
|
|
|
+
|
|
|
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
|
|
|
+ : next_backslash && next_next && *next_next == '|');
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- bufp->syntax = syntax;
|
|
|
- bufp->fastmap_accurate = 0;
|
|
|
- bufp->not_bol = bufp->not_eol = 0;
|
|
|
|
|
|
-
|
|
|
- printer (for debugging) will think there's no pattern. We reset it
|
|
|
- at the end. */
|
|
|
- bufp->used = 0;
|
|
|
+
|
|
|
+ false if it's not. */
|
|
|
|
|
|
-
|
|
|
- bufp->re_nsub = 0;
|
|
|
+static boolean group_in_compile_stack _RE_ARGS((compile_stack_type
|
|
|
+ compile_stack,
|
|
|
+ regnum_t regnum));
|
|
|
|
|
|
-#if !defined emacs && !defined SYNTAX_TABLE
|
|
|
-
|
|
|
- init_syntax_once();
|
|
|
-#endif
|
|
|
+static boolean group_in_compile_stack(compile_stack, regnum)
|
|
|
+compile_stack_type compile_stack;
|
|
|
+regnum_t regnum;
|
|
|
+{
|
|
|
+ int this_element;
|
|
|
|
|
|
- if (bufp->allocated == 0) {
|
|
|
- if (bufp->buffer) {
|
|
|
- enough space. This loses if buffer's address is bogus, but
|
|
|
- that is the user's responsibility. */
|
|
|
- RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
|
|
|
- } else {
|
|
|
- bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
|
|
|
- }
|
|
|
- if (!bufp->buffer)
|
|
|
- FREE_STACK_RETURN(REG_ESPACE);
|
|
|
+ for (this_element = compile_stack.avail - 1;
|
|
|
+ this_element >= 0; this_element--)
|
|
|
+ if (compile_stack.stack[this_element].regnum == regnum)
|
|
|
+ return true;
|
|
|
|
|
|
- bufp->allocated = INIT_BUF_SIZE;
|
|
|
- }
|
|
|
+ return false;
|
|
|
+}
|
|
|
|
|
|
- begalt = b = bufp->buffer;
|
|
|
|
|
|
-
|
|
|
- while (p != pend) {
|
|
|
- PATFETCH(c);
|
|
|
+
|
|
|
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
|
|
|
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
|
|
|
+ Then we set the translation of all bits between the starting and
|
|
|
+ ending characters (inclusive) in the compiled pattern B.
|
|
|
|
|
|
- switch (c) {
|
|
|
- case '^':
|
|
|
- {
|
|
|
- if (
|
|
|
- p == pattern + 1
|
|
|
-
|
|
|
- || syntax & RE_CONTEXT_INDEP_ANCHORS
|
|
|
-
|
|
|
- || at_begline_loc_p(pattern, p, syntax))
|
|
|
- BUF_PUSH(begline);
|
|
|
- else
|
|
|
- goto normal_char;
|
|
|
- }
|
|
|
- break;
|
|
|
+ Return an error code.
|
|
|
|
|
|
+ We use these short variable names so we can use the same macros as
|
|
|
+ `regex_compile' itself. */
|
|
|
|
|
|
- case '$':
|
|
|
- {
|
|
|
- if (
|
|
|
- p == pend
|
|
|
-
|
|
|
- || syntax & RE_CONTEXT_INDEP_ANCHORS
|
|
|
-
|
|
|
- || at_endline_loc_p(p, pend, syntax))
|
|
|
- BUF_PUSH(endline);
|
|
|
- else
|
|
|
- goto normal_char;
|
|
|
- }
|
|
|
- break;
|
|
|
+static reg_errcode_t compile_range(p_ptr, pend, translate, syntax, b)
|
|
|
+const char **p_ptr, *pend;
|
|
|
+RE_TRANSLATE_TYPE translate;
|
|
|
+reg_syntax_t syntax;
|
|
|
+unsigned char *b;
|
|
|
+{
|
|
|
+ unsigned this_char;
|
|
|
|
|
|
+ const char *p = *p_ptr;
|
|
|
+ reg_errcode_t ret;
|
|
|
+ char range_start[2];
|
|
|
+ char range_end[2];
|
|
|
+ char ch[2];
|
|
|
|
|
|
- case '+':
|
|
|
- case '?':
|
|
|
- if ((syntax & RE_BK_PLUS_QM)
|
|
|
- || (syntax & RE_LIMITED_OPS))
|
|
|
- goto normal_char;
|
|
|
- handle_plus:
|
|
|
- case '*':
|
|
|
-
|
|
|
- if (!laststart) {
|
|
|
- if (syntax & RE_CONTEXT_INVALID_OPS)
|
|
|
- FREE_STACK_RETURN(REG_BADRPT);
|
|
|
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
|
|
|
- goto normal_char;
|
|
|
- }
|
|
|
+ if (p == pend)
|
|
|
+ return REG_ERANGE;
|
|
|
|
|
|
- {
|
|
|
-
|
|
|
- boolean keep_string_p = false;
|
|
|
+
|
|
|
+ appropriate translation is done in the bit-setting loop below. */
|
|
|
+ range_start[0] = p[-2];
|
|
|
+ range_start[1] = '\0';
|
|
|
+ range_end[0] = p[0];
|
|
|
+ range_end[1] = '\0';
|
|
|
|
|
|
-
|
|
|
- char zero_times_ok = 0, many_times_ok = 0;
|
|
|
+
|
|
|
+ caller isn't still at the ending character. */
|
|
|
+ (*p_ptr)++;
|
|
|
|
|
|
-
|
|
|
- down to just one (the right one). We can't combine
|
|
|
- interval operators with these because of, e.g., `a{2}*',
|
|
|
- which should only match an even number of `a's. */
|
|
|
+
|
|
|
+ ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
|
|
|
|
|
|
- for (;;) {
|
|
|
- zero_times_ok |= c != '+';
|
|
|
- many_times_ok |= c != '?';
|
|
|
+
|
|
|
+ char' -- we would otherwise go into an infinite loop, since all
|
|
|
+ characters <= 0xff. */
|
|
|
+ ch[1] = '\0';
|
|
|
+ for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) {
|
|
|
+ ch[0] = this_char;
|
|
|
+ if (strcoll(range_start, ch) <= 0 && strcoll(ch, range_end) <= 0) {
|
|
|
+ SET_LIST_BIT(TRANSLATE(this_char));
|
|
|
+ ret = REG_NOERROR;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- if (p == pend)
|
|
|
- break;
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
|
|
|
+ characters can start a string that matches the pattern. This fastmap
|
|
|
+ is used by re_search to skip quickly over impossible starting points.
|
|
|
|
|
|
- PATFETCH(c);
|
|
|
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
|
|
|
+ area as BUFP->fastmap.
|
|
|
|
|
|
- if (c == '*'
|
|
|
- || (!(syntax & RE_BK_PLUS_QM)
|
|
|
- && (c == '+' || c == '?')));
|
|
|
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
|
|
|
+ the pattern buffer.
|
|
|
|
|
|
- else if (syntax & RE_BK_PLUS_QM && c == '\\') {
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EESCAPE);
|
|
|
+ Returns 0 if we succeed, -2 if an internal error. */
|
|
|
|
|
|
- PATFETCH(c1);
|
|
|
- if (!(c1 == '+' || c1 == '?')) {
|
|
|
- PATUNFETCH;
|
|
|
- PATUNFETCH;
|
|
|
- break;
|
|
|
- }
|
|
|
+int re_compile_fastmap(bufp)
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+{
|
|
|
+ int j, k;
|
|
|
|
|
|
- c = c1;
|
|
|
- } else {
|
|
|
- PATUNFETCH;
|
|
|
- break;
|
|
|
- }
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+ fail_stack_type fail_stack;
|
|
|
+#endif
|
|
|
+#ifndef REGEX_MALLOC
|
|
|
+ char *destination;
|
|
|
+#endif
|
|
|
|
|
|
-
|
|
|
- }
|
|
|
+ register char *fastmap = bufp->fastmap;
|
|
|
+ unsigned char *pattern = bufp->buffer;
|
|
|
+ unsigned char *p = pattern;
|
|
|
+ register unsigned char *pend = pattern + bufp->used;
|
|
|
|
|
|
-
|
|
|
- to an empty pattern. */
|
|
|
- if (!laststart)
|
|
|
- break;
|
|
|
+#ifdef REL_ALLOC
|
|
|
+
|
|
|
+ it is allocated relocatably. */
|
|
|
+ fail_stack_elt_t *failure_stack_ptr;
|
|
|
+#endif
|
|
|
|
|
|
-
|
|
|
- and also whether or not two or more matches is allowed. */
|
|
|
- if (many_times_ok) {
|
|
|
- end a backward relative jump from `b' to before the next
|
|
|
- jump we're going to put in below (which jumps from
|
|
|
- laststart to after this jump).
|
|
|
+
|
|
|
+ proven otherwise. We set this false at the bottom of switch
|
|
|
+ statement, to which we get only if a particular path doesn't
|
|
|
+ match the empty string. */
|
|
|
+ boolean path_can_be_null = true;
|
|
|
|
|
|
- But if we are at the `*' in the exact sequence `.*\n',
|
|
|
- insert an unconditional jump backwards to the .,
|
|
|
- instead of the beginning of the loop. This way we only
|
|
|
- push a failure point once, instead of every time
|
|
|
- through the loop. */
|
|
|
- assert(p - 1 > pattern);
|
|
|
+
|
|
|
+ boolean succeed_n_p = false;
|
|
|
|
|
|
-
|
|
|
- GET_BUFFER_SPACE(3);
|
|
|
+ assert(fastmap != NULL && p != NULL);
|
|
|
|
|
|
-
|
|
|
- because laststart was nonzero. And we've already
|
|
|
- incremented `p', by the way, to be the character after
|
|
|
- the `*'. Do we have to do something analogous here
|
|
|
- for null bytes, because of RE_DOT_NOT_NULL? */
|
|
|
- if (TRANSLATE(*(p - 2)) == TRANSLATE('.')
|
|
|
- && zero_times_ok
|
|
|
- && p < pend && TRANSLATE(*p) == TRANSLATE('\n')
|
|
|
- && !(syntax & RE_DOT_NEWLINE)) {
|
|
|
- STORE_JUMP(jump, b, laststart);
|
|
|
- keep_string_p = true;
|
|
|
- } else
|
|
|
-
|
|
|
- STORE_JUMP(maybe_pop_jump, b, laststart - 3);
|
|
|
+ INIT_FAIL_STACK();
|
|
|
+ bzero(fastmap, 1 << BYTEWIDTH);
|
|
|
+ bufp->fastmap_accurate = 1;
|
|
|
+ bufp->can_be_null = 0;
|
|
|
|
|
|
-
|
|
|
- b += 3;
|
|
|
- }
|
|
|
+ while (1) {
|
|
|
+ if (p == pend || *p == succeed) {
|
|
|
+
|
|
|
+ if (!FAIL_STACK_EMPTY()) {
|
|
|
+ bufp->can_be_null |= path_can_be_null;
|
|
|
|
|
|
-
|
|
|
- end of the buffer after this jump is inserted. */
|
|
|
- GET_BUFFER_SPACE(3);
|
|
|
- INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump
|
|
|
- : on_failure_jump, laststart, b + 3);
|
|
|
- pending_exact = 0;
|
|
|
- b += 3;
|
|
|
+
|
|
|
+ path_can_be_null = true;
|
|
|
|
|
|
- if (!zero_times_ok) {
|
|
|
-
|
|
|
- `dummy_failure_jump' before the initial
|
|
|
- `on_failure_jump' instruction of the loop. This
|
|
|
- effects a skip over that instruction the first time
|
|
|
- we hit that loop. */
|
|
|
- GET_BUFFER_SPACE(3);
|
|
|
- INSERT_JUMP(dummy_failure_jump, laststart,
|
|
|
- laststart + 6);
|
|
|
- b += 3;
|
|
|
- }
|
|
|
- }
|
|
|
- break;
|
|
|
+ p = fail_stack.stack[--fail_stack.avail].pointer;
|
|
|
|
|
|
+ continue;
|
|
|
+ } else
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
- case '.':
|
|
|
- laststart = b;
|
|
|
- BUF_PUSH(anychar);
|
|
|
- break;
|
|
|
+
|
|
|
+ assert(p < pend);
|
|
|
|
|
|
+ switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
|
|
|
|
|
|
- case '[':
|
|
|
- {
|
|
|
- boolean had_char_class = false;
|
|
|
+
|
|
|
+ if a backreference is used, since it's too hard to figure out
|
|
|
+ the fastmap for the corresponding group. Setting
|
|
|
+ `can_be_null' stops `re_search_2' from using the fastmap, so
|
|
|
+ that is all we do. */
|
|
|
+ case duplicate:
|
|
|
+ bufp->can_be_null = 1;
|
|
|
+ goto done;
|
|
|
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
-
|
|
|
- opcode, the length count, and the bitset; 34 bytes in all. */
|
|
|
- GET_BUFFER_SPACE(34);
|
|
|
+
|
|
|
+ with `break'. */
|
|
|
|
|
|
- laststart = b;
|
|
|
+ case exactn:
|
|
|
+ fastmap[p[1]] = 1;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- statement, so we only need one BUF_PUSH. */
|
|
|
- BUF_PUSH(*p == '^' ? charset_not : charset);
|
|
|
- if (*p == '^')
|
|
|
- p++;
|
|
|
|
|
|
-
|
|
|
- p1 = p;
|
|
|
+ case charset:
|
|
|
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
|
|
|
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
|
|
|
+ fastmap[j] = 1;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
|
|
|
|
|
|
-
|
|
|
- bzero(b, (1 << BYTEWIDTH) / BYTEWIDTH);
|
|
|
+ case charset_not:
|
|
|
+
|
|
|
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
|
|
|
+ fastmap[j] = 1;
|
|
|
|
|
|
-
|
|
|
- if ((re_opcode_t) b[-2] == charset_not
|
|
|
- && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) SET_LIST_BIT('\n');
|
|
|
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
|
|
|
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
|
|
|
+ fastmap[j] = 1;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- for (;;) {
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
- PATFETCH(c);
|
|
|
+ case wordchar:
|
|
|
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
+ if (SYNTAX(j) == Sword)
|
|
|
+ fastmap[j] = 1;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') {
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EESCAPE);
|
|
|
|
|
|
- PATFETCH(c1);
|
|
|
- SET_LIST_BIT(c1);
|
|
|
- continue;
|
|
|
- }
|
|
|
+ case notwordchar:
|
|
|
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
+ if (SYNTAX(j) != Sword)
|
|
|
+ fastmap[j] = 1;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- not (i.e., when the bracket expression is `[]' so
|
|
|
- far), the ']' character bit gets set way below. */
|
|
|
- if (c == ']' && p != p1 + 1)
|
|
|
- break;
|
|
|
|
|
|
-
|
|
|
- was a character class. */
|
|
|
- if (had_char_class && c == '-' && *p != ']')
|
|
|
- FREE_STACK_RETURN(REG_ERANGE);
|
|
|
+ case anychar:
|
|
|
+ {
|
|
|
+ int fastmap_newline = fastmap['\n'];
|
|
|
|
|
|
-
|
|
|
- was a character: if this is a hyphen not at the
|
|
|
- beginning or the end of a list, then it's the range
|
|
|
- operator. */
|
|
|
- if (c == '-' && !(p - 2 >= pattern && p[-2] == '[')
|
|
|
- && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
|
|
|
- && *p != ']') {
|
|
|
- reg_errcode_t ret
|
|
|
- = compile_range(&p, pend, translate, syntax, b);
|
|
|
+
|
|
|
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
+ fastmap[j] = 1;
|
|
|
|
|
|
- if (ret != REG_NOERROR)
|
|
|
- FREE_STACK_RETURN(ret);
|
|
|
- }
|
|
|
+
|
|
|
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
|
|
|
+ fastmap['\n'] = fastmap_newline;
|
|
|
|
|
|
- else if (p[0] == '-' && p[1] != ']') {
|
|
|
- reg_errcode_t ret;
|
|
|
+
|
|
|
+ then the fastmap is irrelevant. Something's wrong here. */
|
|
|
+ else if (bufp->can_be_null)
|
|
|
+ goto done;
|
|
|
|
|
|
-
|
|
|
- PATFETCH(c1);
|
|
|
+
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
- ret = compile_range(&p, pend, translate, syntax, b);
|
|
|
- if (ret != REG_NOERROR)
|
|
|
- FREE_STACK_RETURN(ret);
|
|
|
- }
|
|
|
+#ifdef emacs
|
|
|
+ case syntaxspec:
|
|
|
+ k = *p++;
|
|
|
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
+ if (SYNTAX(j) == (enum syntaxcode) k)
|
|
|
+ fastmap[j] = 1;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- class. */
|
|
|
|
|
|
- else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') {
|
|
|
- char str[CHAR_CLASS_MAX_LENGTH + 1];
|
|
|
+ case notsyntaxspec:
|
|
|
+ k = *p++;
|
|
|
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
+ if (SYNTAX(j) != (enum syntaxcode) k)
|
|
|
+ fastmap[j] = 1;
|
|
|
+ break;
|
|
|
|
|
|
- PATFETCH(c);
|
|
|
- c1 = 0;
|
|
|
|
|
|
-
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EBRACK);
|
|
|
+
|
|
|
+ `continue'. */
|
|
|
|
|
|
- for (;;) {
|
|
|
- PATFETCH(c);
|
|
|
- if ((c == ':' && *p == ']') || p == pend)
|
|
|
- break;
|
|
|
- if (c1 < CHAR_CLASS_MAX_LENGTH)
|
|
|
- str[c1++] = c;
|
|
|
- else
|
|
|
-
|
|
|
- str[0] = '\0';
|
|
|
- }
|
|
|
- str[c1] = '\0';
|
|
|
|
|
|
-
|
|
|
- undo the ending character, the letters, and leave
|
|
|
- the leading `:' and `[' (but set bits for them). */
|
|
|
- if (c == ':' && *p == ']') {
|
|
|
-#if defined _LIBC || WIDE_CHAR_SUPPORT
|
|
|
- boolean is_lower = STREQ(str, "lower");
|
|
|
- boolean is_upper = STREQ(str, "upper");
|
|
|
- wctype_t wt;
|
|
|
- int ch;
|
|
|
+ case before_dot:
|
|
|
+ case at_dot:
|
|
|
+ case after_dot:
|
|
|
+ continue;
|
|
|
+#endif
|
|
|
|
|
|
- wt = IS_CHAR_CLASS(str);
|
|
|
- if (wt == 0)
|
|
|
- FREE_STACK_RETURN(REG_ECTYPE);
|
|
|
|
|
|
-
|
|
|
- class. */
|
|
|
- PATFETCH(c);
|
|
|
+ case no_op:
|
|
|
+ case begline:
|
|
|
+ case endline:
|
|
|
+ case begbuf:
|
|
|
+ case endbuf:
|
|
|
+ case wordbound:
|
|
|
+ case notwordbound:
|
|
|
+ case wordbeg:
|
|
|
+ case wordend:
|
|
|
+ case push_dummy_failure:
|
|
|
+ continue;
|
|
|
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
- for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) {
|
|
|
-# ifdef _LIBC
|
|
|
- if (__iswctype(__btowc(ch), wt))
|
|
|
- SET_LIST_BIT(ch);
|
|
|
-# else
|
|
|
- if (iswctype(btowc(ch), wt))
|
|
|
- SET_LIST_BIT(ch);
|
|
|
-# endif
|
|
|
+ case jump_n:
|
|
|
+ case pop_failure_jump:
|
|
|
+ case maybe_pop_jump:
|
|
|
+ case jump:
|
|
|
+ case jump_past_alt:
|
|
|
+ case dummy_failure_jump:
|
|
|
+ EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
+ p += j;
|
|
|
+ if (j > 0)
|
|
|
+ continue;
|
|
|
|
|
|
- if (translate && (is_upper || is_lower)
|
|
|
- && (ISUPPER(ch) || ISLOWER(ch)))
|
|
|
- SET_LIST_BIT(ch);
|
|
|
- }
|
|
|
+
|
|
|
+ loop and matched nothing. Opcode jumped to should be
|
|
|
+ `on_failure_jump' or `succeed_n'. Just treat it like an
|
|
|
+ ordinary jump. For a * loop, it has pushed its failure
|
|
|
+ point already; if so, discard that as redundant. */
|
|
|
+ if ((re_opcode_t) * p != on_failure_jump
|
|
|
+ && (re_opcode_t) * p != succeed_n)
|
|
|
+ continue;
|
|
|
|
|
|
- had_char_class = true;
|
|
|
-#else
|
|
|
- int ch;
|
|
|
- boolean is_alnum = STREQ(str, "alnum");
|
|
|
- boolean is_alpha = STREQ(str, "alpha");
|
|
|
- boolean is_blank = STREQ(str, "blank");
|
|
|
- boolean is_cntrl = STREQ(str, "cntrl");
|
|
|
- boolean is_digit = STREQ(str, "digit");
|
|
|
- boolean is_graph = STREQ(str, "graph");
|
|
|
- boolean is_lower = STREQ(str, "lower");
|
|
|
- boolean is_print = STREQ(str, "print");
|
|
|
- boolean is_punct = STREQ(str, "punct");
|
|
|
- boolean is_space = STREQ(str, "space");
|
|
|
- boolean is_upper = STREQ(str, "upper");
|
|
|
- boolean is_xdigit = STREQ(str, "xdigit");
|
|
|
+ p++;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
+ p += j;
|
|
|
|
|
|
- if (!IS_CHAR_CLASS(str))
|
|
|
- FREE_STACK_RETURN(REG_ECTYPE);
|
|
|
+
|
|
|
+ if (!FAIL_STACK_EMPTY()
|
|
|
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
|
|
|
+ fail_stack.avail--;
|
|
|
|
|
|
-
|
|
|
- class. */
|
|
|
- PATFETCH(c);
|
|
|
+ continue;
|
|
|
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
- for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
|
|
|
-
|
|
|
- avoid an arbitrary limit in some compiler. */
|
|
|
- if ((is_alnum && ISALNUM(ch))
|
|
|
- || (is_alpha && ISALPHA(ch))
|
|
|
- || (is_blank && ISBLANK(ch))
|
|
|
- || (is_cntrl && ISCNTRL(ch)))
|
|
|
- SET_LIST_BIT(ch);
|
|
|
- if ((is_digit && ISDIGIT(ch))
|
|
|
- || (is_graph && ISGRAPH(ch))
|
|
|
- || (is_lower && ISLOWER(ch))
|
|
|
- || (is_print && ISPRINT(ch)))
|
|
|
- SET_LIST_BIT(ch);
|
|
|
- if ((is_punct && ISPUNCT(ch))
|
|
|
- || (is_space && ISSPACE(ch))
|
|
|
- || (is_upper && ISUPPER(ch))
|
|
|
- || (is_xdigit && ISXDIGIT(ch)))
|
|
|
- SET_LIST_BIT(ch);
|
|
|
- if (translate && (is_upper || is_lower)
|
|
|
- && (ISUPPER(ch) || ISLOWER(ch)))
|
|
|
- SET_LIST_BIT(ch);
|
|
|
- }
|
|
|
- had_char_class = true;
|
|
|
-#endif
|
|
|
- } else {
|
|
|
- c1++;
|
|
|
- while (c1--)
|
|
|
- PATUNFETCH;
|
|
|
- SET_LIST_BIT('[');
|
|
|
- SET_LIST_BIT(':');
|
|
|
- had_char_class = false;
|
|
|
- }
|
|
|
- } else {
|
|
|
- had_char_class = false;
|
|
|
- SET_LIST_BIT(c);
|
|
|
+ case on_failure_jump:
|
|
|
+ case on_failure_keep_string_jump:
|
|
|
+ handle_on_failure_jump:
|
|
|
+ EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
+
|
|
|
+
|
|
|
+ end of the pattern. We don't want to push such a point,
|
|
|
+ since when we restore it above, entering the switch will
|
|
|
+ increment `p' past the end of the pattern. We don't need
|
|
|
+ to push such a point since we obviously won't find any more
|
|
|
+ fastmap entries beyond `pend'. Such a pattern can match
|
|
|
+ the null string, though. */
|
|
|
+ if (p + j < pend) {
|
|
|
+ if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
|
|
|
+ RESET_FAIL_STACK();
|
|
|
+ return -2;
|
|
|
}
|
|
|
+ } else
|
|
|
+ bufp->can_be_null = 1;
|
|
|
+
|
|
|
+ if (succeed_n_p) {
|
|
|
+ EXTRACT_NUMBER_AND_INCR(k, p);
|
|
|
+ succeed_n_p = false;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
- end of the map. Decrease the map-length byte too. */
|
|
|
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
|
|
|
- b[-1]--;
|
|
|
- b += b[-1];
|
|
|
- }
|
|
|
- break;
|
|
|
+ continue;
|
|
|
|
|
|
|
|
|
- case '(':
|
|
|
- if (syntax & RE_NO_BK_PARENS)
|
|
|
- goto handle_open;
|
|
|
- else
|
|
|
- goto normal_char;
|
|
|
+ case succeed_n:
|
|
|
+
|
|
|
+ p += 2;
|
|
|
|
|
|
+
|
|
|
+ EXTRACT_NUMBER_AND_INCR(k, p);
|
|
|
+ if (k == 0) {
|
|
|
+ p -= 4;
|
|
|
+ succeed_n_p = true;
|
|
|
+ goto handle_on_failure_jump;
|
|
|
+ }
|
|
|
+ continue;
|
|
|
|
|
|
- case ')':
|
|
|
- if (syntax & RE_NO_BK_PARENS)
|
|
|
- goto handle_close;
|
|
|
- else
|
|
|
- goto normal_char;
|
|
|
|
|
|
+ case set_number_at:
|
|
|
+ p += 4;
|
|
|
+ continue;
|
|
|
|
|
|
- case '\n':
|
|
|
- if (syntax & RE_NEWLINE_ALT)
|
|
|
- goto handle_alt;
|
|
|
- else
|
|
|
- goto normal_char;
|
|
|
|
|
|
+ case start_memory:
|
|
|
+ case stop_memory:
|
|
|
+ p += 2;
|
|
|
+ continue;
|
|
|
|
|
|
- case '|':
|
|
|
- if (syntax & RE_NO_BK_VBAR)
|
|
|
- goto handle_alt;
|
|
|
- else
|
|
|
- goto normal_char;
|
|
|
|
|
|
+ default:
|
|
|
+ abort();
|
|
|
+ }
|
|
|
|
|
|
- case '{':
|
|
|
- if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
|
|
|
- goto handle_interval;
|
|
|
- else
|
|
|
- goto normal_char;
|
|
|
+
|
|
|
+ characters for one path of the pattern -- and that the empty
|
|
|
+ string does not match. We need not follow this path further.
|
|
|
+ Instead, look at the next alternative (remembered on the
|
|
|
+ stack), or quit if no more. The test at the top of the loop
|
|
|
+ does these things. */
|
|
|
+ path_can_be_null = false;
|
|
|
+ p = pend;
|
|
|
+ }
|
|
|
|
|
|
+
|
|
|
+ pattern is empty). */
|
|
|
+ bufp->can_be_null |= path_can_be_null;
|
|
|
|
|
|
- case '\\':
|
|
|
- if (p == pend)
|
|
|
- FREE_STACK_RETURN(REG_EESCAPE);
|
|
|
+ done:
|
|
|
+ RESET_FAIL_STACK();
|
|
|
+ return 0;
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- distinguish, e.g., \B from \b, even if we normally would
|
|
|
- translate, e.g., B to b. */
|
|
|
- PATFETCH_RAW(c);
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__re_compile_fastmap, re_compile_fastmap)
|
|
|
+#endif
|
|
|
+
|
|
|
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
|
|
|
+ this memory for recording register information. STARTS and ENDS
|
|
|
+ must be allocated using the malloc library routine, and must each
|
|
|
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
|
|
|
|
|
|
- switch (c) {
|
|
|
- case '(':
|
|
|
- if (syntax & RE_NO_BK_PARENS)
|
|
|
- goto normal_backslash;
|
|
|
+ If NUM_REGS == 0, then subsequent matches should allocate their own
|
|
|
+ register data.
|
|
|
|
|
|
- handle_open:
|
|
|
- bufp->re_nsub++;
|
|
|
- regnum++;
|
|
|
+ Unless this function is called, the first search or match using
|
|
|
+ PATTERN_BUFFER will allocate its own register data, without
|
|
|
+ freeing the old data. */
|
|
|
+void re_set_registers(bufp, regs, num_regs, starts, ends)
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+struct re_registers *regs;
|
|
|
+unsigned num_regs;
|
|
|
+regoff_t *starts, *ends;
|
|
|
+{
|
|
|
+ if (num_regs) {
|
|
|
+ bufp->regs_allocated = REGS_REALLOCATE;
|
|
|
+ regs->num_regs = num_regs;
|
|
|
+ regs->start = starts;
|
|
|
+ regs->end = ends;
|
|
|
+ } else {
|
|
|
+ bufp->regs_allocated = REGS_UNALLOCATED;
|
|
|
+ regs->num_regs = 0;
|
|
|
+ regs->start = regs->end = (regoff_t *) 0;
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
- if (COMPILE_STACK_FULL) {
|
|
|
- RETALLOC(compile_stack.stack, compile_stack.size << 1,
|
|
|
- compile_stack_elt_t);
|
|
|
- if (compile_stack.stack == NULL)
|
|
|
- return REG_ESPACE;
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__re_set_registers, re_set_registers)
|
|
|
+#endif
|
|
|
+
|
|
|
+
|
|
|
+ doesn't let you say where to stop matching. */
|
|
|
+int re_search(bufp, string, size, startpos, range, regs)
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+const char *string;
|
|
|
+int size, startpos, range;
|
|
|
+struct re_registers *regs;
|
|
|
+{
|
|
|
+ return re_search_2(bufp, NULL, 0, string, size, startpos, range,
|
|
|
+ regs, size);
|
|
|
+}
|
|
|
|
|
|
- compile_stack.size <<= 1;
|
|
|
- }
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__re_search, re_search)
|
|
|
+#endif
|
|
|
+
|
|
|
+ virtual concatenation of STRING1 and STRING2, starting first at index
|
|
|
+ STARTPOS, then at STARTPOS + 1, and so on.
|
|
|
|
|
|
-
|
|
|
- group. They are all relative offsets, so that if the
|
|
|
- whole pattern moves because of realloc, they will still
|
|
|
- be valid. */
|
|
|
- COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
|
|
|
- COMPILE_STACK_TOP.fixup_alt_jump
|
|
|
- =
|
|
|
- fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
|
|
|
- COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
|
|
|
- COMPILE_STACK_TOP.regnum = regnum;
|
|
|
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
|
|
|
|
|
|
-
|
|
|
- groups inner to this one. But do not push a
|
|
|
- start_memory for groups beyond the last one we can
|
|
|
- represent in the compiled pattern. */
|
|
|
- if (regnum <= MAX_REGNUM) {
|
|
|
- COMPILE_STACK_TOP.inner_group_offset =
|
|
|
- b - bufp->buffer + 2;
|
|
|
- BUF_PUSH_3(start_memory, regnum, 0);
|
|
|
- }
|
|
|
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
|
|
|
+ only at STARTPOS; in general, the last start tried is STARTPOS +
|
|
|
+ RANGE.
|
|
|
|
|
|
- compile_stack.avail++;
|
|
|
+ In REGS, return the indices of the virtual concatenation of STRING1
|
|
|
+ and STRING2 that matched the entire BUFP->buffer and its contained
|
|
|
+ subexpressions.
|
|
|
|
|
|
- fixup_alt_jump = 0;
|
|
|
- laststart = 0;
|
|
|
- begalt = b;
|
|
|
-
|
|
|
- won't actually generate any code, so we'll have to
|
|
|
- clear pending_exact explicitly. */
|
|
|
- pending_exact = 0;
|
|
|
- break;
|
|
|
+ Do not consider matching one past the index STOP in the virtual
|
|
|
+ concatenation of STRING1 and STRING2.
|
|
|
|
|
|
+ We return either the position in the strings at which the match was
|
|
|
+ found, -1 if no match, or -2 if error (such as failure
|
|
|
+ stack overflow). */
|
|
|
+int
|
|
|
+re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs,
|
|
|
+ stop)
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+const char *string1, *string2;
|
|
|
+int size1, size2;
|
|
|
+int startpos;
|
|
|
+int range;
|
|
|
+struct re_registers *regs;
|
|
|
+int stop;
|
|
|
+{
|
|
|
+ int val;
|
|
|
+ register char *fastmap = bufp->fastmap;
|
|
|
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
|
|
|
+ int total_size = size1 + size2;
|
|
|
+ int endpos = startpos + range;
|
|
|
|
|
|
- case ')':
|
|
|
- if (syntax & RE_NO_BK_PARENS)
|
|
|
- goto normal_backslash;
|
|
|
+
|
|
|
+ if (startpos < 0 || startpos > total_size)
|
|
|
+ return -1;
|
|
|
|
|
|
- if (COMPILE_STACK_EMPTY) {
|
|
|
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
|
|
|
- goto normal_backslash;
|
|
|
- else
|
|
|
- FREE_STACK_RETURN(REG_ERPAREN);
|
|
|
- }
|
|
|
+
|
|
|
+ the virtual concatenation of STRING1 and STRING2.
|
|
|
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
|
|
|
+ if (endpos < 0)
|
|
|
+ range = 0 - startpos;
|
|
|
+ else if (endpos > total_size)
|
|
|
+ range = total_size - startpos;
|
|
|
|
|
|
- handle_close:
|
|
|
- if (fixup_alt_jump) {
|
|
|
- alternative for a possible future
|
|
|
- `pop_failure_jump' to pop. See comments at
|
|
|
- `push_dummy_failure' in `re_match_2'. */
|
|
|
- BUF_PUSH(push_dummy_failure);
|
|
|
+
|
|
|
+ search for a pattern that must be anchored. */
|
|
|
+ if (bufp->used > 0 && range > 0
|
|
|
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
|
|
|
+
|
|
|
+ || ((re_opcode_t) bufp->buffer[0] == begline
|
|
|
+ && !bufp->newline_anchor))) {
|
|
|
+ if (startpos > 0)
|
|
|
+ return -1;
|
|
|
+ else
|
|
|
+ range = 1;
|
|
|
+ }
|
|
|
+#ifdef emacs
|
|
|
+
|
|
|
+ don't keep searching past point. */
|
|
|
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
|
|
|
+ && range > 0) {
|
|
|
+ range = PT - startpos;
|
|
|
+ if (range <= 0)
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+#endif
|
|
|
|
|
|
-
|
|
|
- to `fixup_alt_jump', in the `handle_alt' case below. */
|
|
|
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1);
|
|
|
- }
|
|
|
+
|
|
|
+ if (fastmap && !bufp->fastmap_accurate)
|
|
|
+ if (re_compile_fastmap(bufp) == -2)
|
|
|
+ return -2;
|
|
|
|
|
|
-
|
|
|
- if (COMPILE_STACK_EMPTY) {
|
|
|
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
|
|
|
- goto normal_char;
|
|
|
- else
|
|
|
- FREE_STACK_RETURN(REG_ERPAREN);
|
|
|
- }
|
|
|
+
|
|
|
+ for (;;) {
|
|
|
+
|
|
|
+ cannot be the start of a match. If the pattern can match the
|
|
|
+ null string, however, we don't need to skip characters; we want
|
|
|
+ the first null string. */
|
|
|
+ if (fastmap && startpos < total_size && !bufp->can_be_null) {
|
|
|
+ if (range > 0) {
|
|
|
+ register const char *d;
|
|
|
+ register int lim = 0;
|
|
|
+ int irange = range;
|
|
|
|
|
|
-
|
|
|
- ``can't happen''. */
|
|
|
- assert(compile_stack.avail != 0);
|
|
|
- {
|
|
|
-
|
|
|
- later groups should continue to be numbered higher,
|
|
|
- as in `(ab)c(de)' -- the second group is #2. */
|
|
|
- regnum_t this_group_regnum;
|
|
|
+ if (startpos < size1 && startpos + range >= size1)
|
|
|
+ lim = range - (size1 - startpos);
|
|
|
|
|
|
- compile_stack.avail--;
|
|
|
- begalt =
|
|
|
- bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
|
|
|
- fixup_alt_jump =
|
|
|
- COMPILE_STACK_TOP.fixup_alt_jump ? bufp->buffer +
|
|
|
- COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0;
|
|
|
- laststart =
|
|
|
- bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
|
|
|
- this_group_regnum = COMPILE_STACK_TOP.regnum;
|
|
|
-
|
|
|
- won't actually generate any code, so we'll have to
|
|
|
- clear pending_exact explicitly. */
|
|
|
- pending_exact = 0;
|
|
|
-
|
|
|
-
|
|
|
- groups were inside this one. */
|
|
|
- if (this_group_regnum <= MAX_REGNUM) {
|
|
|
- unsigned char *inner_group_loc
|
|
|
-
|
|
|
- =
|
|
|
- bufp->buffer +
|
|
|
- COMPILE_STACK_TOP.inner_group_offset;
|
|
|
-
|
|
|
- *inner_group_loc = regnum - this_group_regnum;
|
|
|
- BUF_PUSH_3(stop_memory, this_group_regnum,
|
|
|
- regnum - this_group_regnum);
|
|
|
- }
|
|
|
- }
|
|
|
- break;
|
|
|
-
|
|
|
-
|
|
|
- case '|':
|
|
|
- if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
|
|
|
- goto normal_backslash;
|
|
|
- handle_alt:
|
|
|
- if (syntax & RE_LIMITED_OPS)
|
|
|
- goto normal_char;
|
|
|
-
|
|
|
-
|
|
|
- jumps to this alternative if the former fails. */
|
|
|
- GET_BUFFER_SPACE(3);
|
|
|
- INSERT_JUMP(on_failure_jump, begalt, b + 6);
|
|
|
- pending_exact = 0;
|
|
|
- b += 3;
|
|
|
-
|
|
|
-
|
|
|
- which gets executed if it gets matched. Adjust that
|
|
|
- jump so it will jump to this alternative's analogous
|
|
|
- jump (put in below, which in turn will jump to the next
|
|
|
- (if any) alternative's such jump, etc.). The last such
|
|
|
- jump jumps to the correct final destination. A picture:
|
|
|
- _____ _____
|
|
|
- | | | |
|
|
|
- | v | v
|
|
|
- a | b | c
|
|
|
-
|
|
|
- If we are at `b', then fixup_alt_jump right now points to a
|
|
|
- three-byte space after `a'. We'll put in the jump, set
|
|
|
- fixup_alt_jump to right after `b', and leave behind three
|
|
|
- bytes which we'll fill in when we get to after `c'. */
|
|
|
-
|
|
|
- if (fixup_alt_jump)
|
|
|
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
|
|
|
-
|
|
|
-
|
|
|
- to be filled in later either by next alternative or
|
|
|
- when know we're at the end of a series of alternatives. */
|
|
|
- fixup_alt_jump = b;
|
|
|
- GET_BUFFER_SPACE(3);
|
|
|
- b += 3;
|
|
|
-
|
|
|
- laststart = 0;
|
|
|
- begalt = b;
|
|
|
- break;
|
|
|
-
|
|
|
-
|
|
|
- case '{':
|
|
|
-
|
|
|
- if (!(syntax & RE_INTERVALS)
|
|
|
-
|
|
|
- operator. */
|
|
|
- || ((syntax & RE_INTERVALS)
|
|
|
- && (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern
|
|
|
- && p == pend))
|
|
|
- goto normal_backslash;
|
|
|
-
|
|
|
- handle_interval:
|
|
|
- {
|
|
|
-
|
|
|
+ d =
|
|
|
+ (startpos >=
|
|
|
+ size1 ? string2 - size1 : string1) + startpos;
|
|
|
|
|
|
-
|
|
|
- int lower_bound = -1, upper_bound = -1;
|
|
|
+
|
|
|
+ inside the loop. */
|
|
|
+ if (translate)
|
|
|
+ while (range > lim && !fastmap[(unsigned char)
|
|
|
+ translate[
|
|
|
+ (unsigned
|
|
|
+ char) *d++]])
|
|
|
+ range--;
|
|
|
+ else
|
|
|
+ while (range > lim && !fastmap[(unsigned char) *d++])
|
|
|
+ range--;
|
|
|
|
|
|
- beg_interval = p - 1;
|
|
|
+ startpos += irange - range;
|
|
|
+ } else {
|
|
|
|
|
|
- if (p == pend) {
|
|
|
- if (!(syntax & RE_INTERVALS)
|
|
|
- && (syntax & RE_NO_BK_BRACES)) goto
|
|
|
- unfetch_interval;
|
|
|
- else
|
|
|
- FREE_STACK_RETURN(REG_EBRACE);
|
|
|
- }
|
|
|
+ register char c = (size1 == 0 || startpos >= size1
|
|
|
+ ? string2[startpos - size1]
|
|
|
+ : string1[startpos]);
|
|
|
|
|
|
- GET_UNSIGNED_NUMBER(lower_bound);
|
|
|
+ if (!fastmap[(unsigned char) TRANSLATE(c)])
|
|
|
+ goto advance;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- if (c == ',') {
|
|
|
- GET_UNSIGNED_NUMBER(upper_bound);
|
|
|
- if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
|
|
|
- || ((syntax & RE_NO_BK_BRACES) && c != '}'))
|
|
|
- FREE_STACK_RETURN(REG_BADBR);
|
|
|
+
|
|
|
+ if (range >= 0 && startpos == total_size && fastmap
|
|
|
+ && !bufp->can_be_null) return -1;
|
|
|
|
|
|
- if (upper_bound < 0)
|
|
|
- upper_bound = RE_DUP_MAX;
|
|
|
- } else
|
|
|
-
|
|
|
- upper_bound = lower_bound;
|
|
|
+ val = re_match_2_internal(bufp, string1, size1, string2, size2,
|
|
|
+ startpos, regs, stop);
|
|
|
+#ifndef REGEX_MALLOC
|
|
|
+# ifdef C_ALLOCA
|
|
|
+ alloca(0);
|
|
|
+# endif
|
|
|
+#endif
|
|
|
|
|
|
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
|
|
|
- || lower_bound > upper_bound) {
|
|
|
- if (!(syntax & RE_INTERVALS)
|
|
|
- && (syntax & RE_NO_BK_BRACES)) goto
|
|
|
- unfetch_interval;
|
|
|
- else
|
|
|
- FREE_STACK_RETURN(REG_BADBR);
|
|
|
- }
|
|
|
+ if (val >= 0)
|
|
|
+ return startpos;
|
|
|
|
|
|
- if (!(syntax & RE_NO_BK_BRACES)) {
|
|
|
- if (c != '\\')
|
|
|
- FREE_STACK_RETURN(REG_EBRACE);
|
|
|
+ if (val == -2)
|
|
|
+ return -2;
|
|
|
|
|
|
- PATFETCH(c);
|
|
|
- }
|
|
|
+ advance:
|
|
|
+ if (!range)
|
|
|
+ break;
|
|
|
+ else if (range > 0) {
|
|
|
+ range--;
|
|
|
+ startpos++;
|
|
|
+ } else {
|
|
|
+ range++;
|
|
|
+ startpos--;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return -1;
|
|
|
+}
|
|
|
|
|
|
- if (c != '}') {
|
|
|
- if (!(syntax & RE_INTERVALS)
|
|
|
- && (syntax & RE_NO_BK_BRACES)) goto
|
|
|
- unfetch_interval;
|
|
|
- else
|
|
|
- FREE_STACK_RETURN(REG_BADBR);
|
|
|
- }
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__re_search_2, re_search_2)
|
|
|
+#endif
|
|
|
+
|
|
|
+ and `string2' into an offset from the beginning of that string. */
|
|
|
+#define POINTER_TO_OFFSET(ptr) \
|
|
|
+ (FIRST_STRING_P (ptr) \
|
|
|
+ ? ((regoff_t) ((ptr) - string1)) \
|
|
|
+ : ((regoff_t) ((ptr) - string2 + size1)))
|
|
|
+
|
|
|
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
|
|
|
+
|
|
|
+ string2 if necessary. */
|
|
|
+#define PREFETCH() \
|
|
|
+ while (d == dend) \
|
|
|
+ { \
|
|
|
+ \
|
|
|
+ if (dend == end_match_2) \
|
|
|
+ goto fail; \
|
|
|
+ \
|
|
|
+ d = string2; \
|
|
|
+ dend = end_match_2; \
|
|
|
+ }
|
|
|
+
|
|
|
+ of `string1' and `string2'. If only one string, it's `string2'. */
|
|
|
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
|
|
|
+#define AT_STRINGS_END(d) ((d) == end2)
|
|
|
+
|
|
|
+ two special cases to check for: if past the end of string1, look at
|
|
|
+ the first character in string2; and if before the beginning of
|
|
|
+ string2, look at the last character in string1. */
|
|
|
+#define WORDCHAR_P(d) \
|
|
|
+ (SYNTAX ((d) == end1 ? *string2 \
|
|
|
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
|
|
|
+ == Sword)
|
|
|
+
|
|
|
+#if 0
|
|
|
+
|
|
|
+ to being word-constituent. */
|
|
|
+#define AT_WORD_BOUNDARY(d) \
|
|
|
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
|
|
|
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
|
|
|
+# define FREE_VARIABLES() \
|
|
|
+ do { \
|
|
|
+ REGEX_FREE_STACK (fail_stack.stack); \
|
|
|
+ FREE_VAR (regstart); \
|
|
|
+ FREE_VAR (regend); \
|
|
|
+ FREE_VAR (old_regstart); \
|
|
|
+ FREE_VAR (old_regend); \
|
|
|
+ FREE_VAR (best_regstart); \
|
|
|
+ FREE_VAR (best_regend); \
|
|
|
+ FREE_VAR (reg_info); \
|
|
|
+ FREE_VAR (reg_dummy); \
|
|
|
+ FREE_VAR (reg_info_dummy); \
|
|
|
+ } while (0)
|
|
|
+#else
|
|
|
+# define FREE_VARIABLES() ((void)0)
|
|
|
+#endif
|
|
|
+
|
|
|
+ register values; since we have a limit of 255 registers (because
|
|
|
+ we use only one byte in the pattern for the register number), we can
|
|
|
+ use numbers larger than 255. They must differ by 1, because of
|
|
|
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
|
|
|
+ be larger than the value for the highest register, so we do not try
|
|
|
+ to actually save any registers when none are active. */
|
|
|
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
|
|
|
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
|
|
|
+
|
|
|
+#ifndef emacs
|
|
|
+
|
|
|
+int re_match(bufp, string, size, pos, regs)
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+const char *string;
|
|
|
+int size, pos;
|
|
|
+struct re_registers *regs;
|
|
|
+{
|
|
|
+ int result = re_match_2_internal(bufp, NULL, 0, string, size,
|
|
|
+ pos, regs, size);
|
|
|
|
|
|
-
|
|
|
+# ifndef REGEX_MALLOC
|
|
|
+# ifdef C_ALLOCA
|
|
|
+ alloca(0);
|
|
|
+# endif
|
|
|
+# endif
|
|
|
+ return result;
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- if (!laststart) {
|
|
|
- if (syntax & RE_CONTEXT_INVALID_OPS)
|
|
|
- FREE_STACK_RETURN(REG_BADRPT);
|
|
|
- else if (syntax & RE_CONTEXT_INDEP_OPS)
|
|
|
- laststart = b;
|
|
|
- else
|
|
|
- goto unfetch_interval;
|
|
|
- }
|
|
|
+# ifdef _LIBC
|
|
|
+weak_alias(__re_match, re_match)
|
|
|
+# endif
|
|
|
+#endif
|
|
|
+static boolean group_match_null_string_p _RE_ARGS((unsigned char **p,
|
|
|
+ unsigned char *end,
|
|
|
+ register_info_type *
|
|
|
|
|
|
-
|
|
|
- all; jump from `laststart' to `b + 3', which will be
|
|
|
- the end of the buffer after we insert the jump. */
|
|
|
- if (upper_bound == 0) {
|
|
|
- GET_BUFFER_SPACE(3);
|
|
|
- INSERT_JUMP(jump, laststart, b + 3);
|
|
|
- b += 3;
|
|
|
- }
|
|
|
+ reg_info));
|
|
|
+static boolean alt_match_null_string_p
|
|
|
+_RE_ARGS(
|
|
|
|
|
|
-
|
|
|
- we're all done, the pattern will look like:
|
|
|
- set_number_at <jump count> <upper bound>
|
|
|
- set_number_at <succeed_n count> <lower bound>
|
|
|
- succeed_n <after jump addr> <succeed_n count>
|
|
|
- <body of loop>
|
|
|
- jump_n <succeed_n addr> <jump count>
|
|
|
- (The upper bound and `jump_n' are omitted if
|
|
|
- `upper_bound' is 1, though.) */
|
|
|
- else {
|
|
|
- more at the end of the loop. */
|
|
|
- unsigned nbytes = 10 + (upper_bound > 1) * 10;
|
|
|
+ (unsigned char *p, unsigned char *end,
|
|
|
+ register_info_type * reg_info));
|
|
|
+static boolean common_op_match_null_string_p
|
|
|
+_RE_ARGS(
|
|
|
|
|
|
- GET_BUFFER_SPACE(nbytes);
|
|
|
+ (unsigned char **p, unsigned char *end,
|
|
|
+ register_info_type * reg_info));
|
|
|
+static int bcmp_translate
|
|
|
+_RE_ARGS((const char *s1, const char *s2, int len, char *translate));
|
|
|
|
|
|
-
|
|
|
- though it will be set during matching by its
|
|
|
- attendant `set_number_at' (inserted next),
|
|
|
- because `re_compile_fastmap' needs to know.
|
|
|
- Jump to the `jump_n' we might insert below. */
|
|
|
- INSERT_JUMP2(succeed_n, laststart,
|
|
|
- b + 5 + (upper_bound > 1) * 5,
|
|
|
- lower_bound);
|
|
|
- b += 5;
|
|
|
+
|
|
|
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
|
|
|
+ and SIZE2, respectively). We start matching at POS, and stop
|
|
|
+ matching at STOP.
|
|
|
|
|
|
-
|
|
|
- before the `succeed_n'. The `5' is the last two
|
|
|
- bytes of this `set_number_at', plus 3 bytes of
|
|
|
- the following `succeed_n'. */
|
|
|
- insert_op2(set_number_at, laststart, 5,
|
|
|
- lower_bound, b);
|
|
|
- b += 5;
|
|
|
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
|
|
|
+ store offsets for the substring each group matched in REGS. See the
|
|
|
+ documentation for exactly how many groups we fill.
|
|
|
|
|
|
- if (upper_bound > 1) {
|
|
|
- append a backward jump to the `succeed_n'
|
|
|
- that starts this interval.
|
|
|
+ We return -1 if no match, -2 if an internal error (such as the
|
|
|
+ failure stack overflowing). Otherwise, we return the length of the
|
|
|
+ matched substring. */
|
|
|
|
|
|
- When we've reached this during matching,
|
|
|
- we'll have matched the interval once, so
|
|
|
- jump back only `upper_bound - 1' times. */
|
|
|
- STORE_JUMP2(jump_n, b, laststart + 5,
|
|
|
- upper_bound - 1);
|
|
|
- b += 5;
|
|
|
+int re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop)
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+const char *string1, *string2;
|
|
|
+int size1, size2;
|
|
|
+int pos;
|
|
|
+struct re_registers *regs;
|
|
|
+int stop;
|
|
|
+{
|
|
|
+ int result = re_match_2_internal(bufp, string1, size1, string2, size2,
|
|
|
+ pos, regs, stop);
|
|
|
|
|
|
-
|
|
|
- parameter of the `jump_n'; that is `b-2' as
|
|
|
- an absolute address. `laststart' will be
|
|
|
- the `set_number_at' we're about to insert;
|
|
|
- `laststart+3' the number to set, the source
|
|
|
- for the relative address. But we are
|
|
|
- inserting into the middle of the pattern --
|
|
|
- so everything is getting moved up by 5.
|
|
|
- Conclusion: (b - 2) - (laststart + 3) + 5,
|
|
|
- i.e., b - laststart.
|
|
|
+#ifndef REGEX_MALLOC
|
|
|
+# ifdef C_ALLOCA
|
|
|
+ alloca(0);
|
|
|
+# endif
|
|
|
+#endif
|
|
|
+ return result;
|
|
|
+}
|
|
|
|
|
|
- We insert this at the beginning of the loop
|
|
|
- so that if we fail during matching, we'll
|
|
|
- reinitialize the bounds. */
|
|
|
- insert_op2(set_number_at, laststart,
|
|
|
- b - laststart, upper_bound - 1, b);
|
|
|
- b += 5;
|
|
|
- }
|
|
|
- }
|
|
|
- pending_exact = 0;
|
|
|
- beg_interval = NULL;
|
|
|
- }
|
|
|
- break;
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__re_match_2, re_match_2)
|
|
|
+#endif
|
|
|
+
|
|
|
+ afterwards. */
|
|
|
+static int
|
|
|
+re_match_2_internal(bufp, string1, size1, string2, size2, pos, regs, stop)
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+const char *string1, *string2;
|
|
|
+int size1, size2;
|
|
|
+int pos;
|
|
|
+struct re_registers *regs;
|
|
|
+int stop;
|
|
|
+{
|
|
|
+
|
|
|
+ int mcnt;
|
|
|
+ unsigned char *p1;
|
|
|
|
|
|
- unfetch_interval:
|
|
|
-
|
|
|
- assert(beg_interval);
|
|
|
- p = beg_interval;
|
|
|
- beg_interval = NULL;
|
|
|
+
|
|
|
+ const char *end1, *end2;
|
|
|
|
|
|
-
|
|
|
- PATFETCH(c);
|
|
|
+
|
|
|
+ each to consider matching. */
|
|
|
+ const char *end_match_1, *end_match_2;
|
|
|
|
|
|
- if (!(syntax & RE_NO_BK_BRACES)) {
|
|
|
- if (p > pattern && p[-1] == '\\')
|
|
|
- goto normal_backslash;
|
|
|
- }
|
|
|
- goto normal_char;
|
|
|
+
|
|
|
+ const char *d, *dend;
|
|
|
|
|
|
-#ifdef emacs
|
|
|
-
|
|
|
- operators. rms says this is ok. --karl */
|
|
|
- case '=':
|
|
|
- BUF_PUSH(at_dot);
|
|
|
- break;
|
|
|
+
|
|
|
+ unsigned char *p = bufp->buffer;
|
|
|
+ register unsigned char *pend = p + bufp->used;
|
|
|
|
|
|
- case 's':
|
|
|
- laststart = b;
|
|
|
- PATFETCH(c);
|
|
|
- BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
|
|
|
- break;
|
|
|
+
|
|
|
+ empty subpattern when we get to the stop_memory. */
|
|
|
+ unsigned char *just_past_start_mem = 0;
|
|
|
|
|
|
- case 'S':
|
|
|
- laststart = b;
|
|
|
- PATFETCH(c);
|
|
|
- BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
|
|
|
- break;
|
|
|
-#endif
|
|
|
+
|
|
|
+ RE_TRANSLATE_TYPE translate = bufp->translate;
|
|
|
|
|
|
+
|
|
|
+ down the line pushes a failure point on this stack. It consists of
|
|
|
+ restart, regend, and reg_info for all registers corresponding to
|
|
|
+ the subexpressions we're currently inside, plus the number of such
|
|
|
+ registers, and, finally, two char *'s. The first char * is where
|
|
|
+ to resume scanning the pattern; the second one is where to resume
|
|
|
+ scanning the strings. If the latter is zero, the failure point is
|
|
|
+ a ``dummy''; if a failure happens and the failure point is a dummy,
|
|
|
+ it gets discarded and the next next one is tried. */
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+ fail_stack_type fail_stack;
|
|
|
+#endif
|
|
|
+#ifdef DEBUG
|
|
|
+ static unsigned failure_id;
|
|
|
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
|
|
|
+#endif
|
|
|
|
|
|
- case 'w':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- laststart = b;
|
|
|
- BUF_PUSH(wordchar);
|
|
|
- break;
|
|
|
+#ifdef REL_ALLOC
|
|
|
+
|
|
|
+ it is allocated relocatably. */
|
|
|
+ fail_stack_elt_t *failure_stack_ptr;
|
|
|
+#endif
|
|
|
|
|
|
+
|
|
|
+ return, for use in backreferences. The number here includes
|
|
|
+ an element for register zero. */
|
|
|
+ size_t num_regs = bufp->re_nsub + 1;
|
|
|
|
|
|
- case 'W':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- laststart = b;
|
|
|
- BUF_PUSH(notwordchar);
|
|
|
- break;
|
|
|
+
|
|
|
+ active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
|
|
|
+ active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
|
|
|
|
|
|
+
|
|
|
+ the input strings; they record just what was matched (on this
|
|
|
+ attempt) by a subexpression part of the pattern, that is, the
|
|
|
+ regnum-th regstart pointer points to where in the pattern we began
|
|
|
+ matching and the regnum-th regend points to right after where we
|
|
|
+ stopped matching the regnum-th subexpression. (The zeroth register
|
|
|
+ keeps track of what the whole pattern matches.) */
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+ const char **regstart, **regend;
|
|
|
+#endif
|
|
|
|
|
|
- case '<':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- BUF_PUSH(wordbeg);
|
|
|
- break;
|
|
|
+
|
|
|
+ match anything, then the register for its start will need to be
|
|
|
+ restored because it will have been set to wherever in the string we
|
|
|
+ are when we last see its open-group operator. Similarly for a
|
|
|
+ register's end. */
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+ const char **old_regstart, **old_regend;
|
|
|
+#endif
|
|
|
|
|
|
- case '>':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- BUF_PUSH(wordend);
|
|
|
- break;
|
|
|
+
|
|
|
+ nested) subexpressions we are currently in. The matched_something
|
|
|
+ field of reg_info[reg_num] helps us tell whether or not we have
|
|
|
+ matched any of the pattern so far this time through the reg_num-th
|
|
|
+ subexpression. These two fields get reset each time through any
|
|
|
+ loop their register is in. */
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+ register_info_type *reg_info;
|
|
|
+#endif
|
|
|
|
|
|
- case 'b':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- BUF_PUSH(wordbound);
|
|
|
- break;
|
|
|
+
|
|
|
+ variables when we find a match better than any we've seen before.
|
|
|
+ This happens as we backtrack through the failure points, which in
|
|
|
+ turn happens only if we have not yet matched the entire string. */
|
|
|
+ unsigned best_regs_set = false;
|
|
|
|
|
|
- case 'B':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- BUF_PUSH(notwordbound);
|
|
|
- break;
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+ const char **best_regstart, **best_regend;
|
|
|
+#endif
|
|
|
|
|
|
- case '`':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- BUF_PUSH(begbuf);
|
|
|
- break;
|
|
|
+
|
|
|
+ allocate space for that if we're not allocating space for anything
|
|
|
+ else (see below). Also, we never need info about register 0 for
|
|
|
+ any of the other register vectors, and it seems rather a kludge to
|
|
|
+ treat `best_regend' differently than the rest. So we keep track of
|
|
|
+ the end of the best match so far in a separate variable. We
|
|
|
+ initialize this to NULL so that when we backtrack the first time
|
|
|
+ and need to test it, it's not garbage. */
|
|
|
+ const char *match_end = NULL;
|
|
|
|
|
|
- case '\'':
|
|
|
- if (syntax & RE_NO_GNU_OPS)
|
|
|
- goto normal_char;
|
|
|
- BUF_PUSH(endbuf);
|
|
|
- break;
|
|
|
+
|
|
|
+ int set_regs_matched_done = 0;
|
|
|
|
|
|
- case '1':
|
|
|
- case '2':
|
|
|
- case '3':
|
|
|
- case '4':
|
|
|
- case '5':
|
|
|
- case '6':
|
|
|
- case '7':
|
|
|
- case '8':
|
|
|
- case '9':
|
|
|
- if (syntax & RE_NO_BK_REFS)
|
|
|
- goto normal_char;
|
|
|
+
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+ const char **reg_dummy;
|
|
|
+ register_info_type *reg_info_dummy;
|
|
|
+#endif
|
|
|
|
|
|
- c1 = c - '0';
|
|
|
+#ifdef DEBUG
|
|
|
+
|
|
|
+ unsigned num_regs_pushed = 0;
|
|
|
+#endif
|
|
|
|
|
|
- if (c1 > regnum)
|
|
|
- FREE_STACK_RETURN(REG_ESUBREG);
|
|
|
+ DEBUG_PRINT1("\n\nEntering re_match_2.\n");
|
|
|
|
|
|
-
|
|
|
- if (group_in_compile_stack(compile_stack, (regnum_t) c1))
|
|
|
- goto normal_char;
|
|
|
+ INIT_FAIL_STACK();
|
|
|
|
|
|
- laststart = b;
|
|
|
- BUF_PUSH_2(duplicate, c1);
|
|
|
- break;
|
|
|
+#ifdef MATCH_MAY_ALLOCATE
|
|
|
+
|
|
|
+ no groups in the pattern, as it takes a fair amount of time. If
|
|
|
+ there are groups, we include space for register 0 (the whole
|
|
|
+ pattern), even though we never use it, since it simplifies the
|
|
|
+ array indexing. We should fix this. */
|
|
|
+ if (bufp->re_nsub) {
|
|
|
+ regstart = REGEX_TALLOC(num_regs, const char *);
|
|
|
+ regend = REGEX_TALLOC(num_regs, const char *);
|
|
|
+ old_regstart = REGEX_TALLOC(num_regs, const char *);
|
|
|
+ old_regend = REGEX_TALLOC(num_regs, const char *);
|
|
|
+ best_regstart = REGEX_TALLOC(num_regs, const char *);
|
|
|
+ best_regend = REGEX_TALLOC(num_regs, const char *);
|
|
|
|
|
|
+ reg_info = REGEX_TALLOC(num_regs, register_info_type);
|
|
|
+ reg_dummy = REGEX_TALLOC(num_regs, const char *);
|
|
|
|
|
|
- case '+':
|
|
|
- case '?':
|
|
|
- if (syntax & RE_BK_PLUS_QM)
|
|
|
- goto handle_plus;
|
|
|
- else
|
|
|
- goto normal_backslash;
|
|
|
+ reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
|
|
|
|
|
|
- default:
|
|
|
- normal_backslash:
|
|
|
-
|
|
|
- not to translate; but if we don't translate it
|
|
|
- it will never match anything. */
|
|
|
- c = TRANSLATE(c);
|
|
|
- goto normal_char;
|
|
|
- }
|
|
|
- break;
|
|
|
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
|
|
|
+ && best_regstart && best_regend && reg_dummy
|
|
|
+ && reg_info_dummy)) {
|
|
|
+ FREE_VARIABLES();
|
|
|
+ return -2;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+
|
|
|
+ `FREE_VARIABLES' doesn't try to free them. */
|
|
|
+ regstart = regend = old_regstart = old_regend = best_regstart
|
|
|
+ = best_regend = reg_dummy = NULL;
|
|
|
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
|
|
|
+ }
|
|
|
+#endif
|
|
|
|
|
|
+
|
|
|
+ if (pos < 0 || pos > size1 + size2) {
|
|
|
+ FREE_VARIABLES();
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
|
|
|
- default:
|
|
|
-
|
|
|
- normal_char:
|
|
|
-
|
|
|
- if (!pending_exact
|
|
|
-
|
|
|
- || pending_exact + *pending_exact + 1 != b
|
|
|
-
|
|
|
- || *pending_exact == (1 << BYTEWIDTH) - 1
|
|
|
-
|
|
|
- || *p == '*' || *p == '^' || ((syntax & RE_BK_PLUS_QM)
|
|
|
- ? *p == '\\' && (p[1] == '+'
|
|
|
- || p[1] ==
|
|
|
- '?') : (*p
|
|
|
- ==
|
|
|
- '+'
|
|
|
- ||
|
|
|
- *p
|
|
|
- ==
|
|
|
- '?'))
|
|
|
- || ((syntax & RE_INTERVALS)
|
|
|
- && ((syntax & RE_NO_BK_BRACES)
|
|
|
- ? *p == '{' : (p[0] == '\\' && p[1] == '{')))) {
|
|
|
-
|
|
|
+
|
|
|
+ start_memory/stop_memory has been seen for. Also initialize the
|
|
|
+ register information struct. */
|
|
|
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
|
|
|
+ regstart[mcnt] = regend[mcnt]
|
|
|
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
|
|
|
|
|
|
- laststart = b;
|
|
|
+ REG_MATCH_NULL_STRING_P(reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
|
|
|
+ IS_ACTIVE(reg_info[mcnt]) = 0;
|
|
|
+ MATCHED_SOMETHING(reg_info[mcnt]) = 0;
|
|
|
+ EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
|
|
|
+ }
|
|
|
|
|
|
- BUF_PUSH_2(exactn, 0);
|
|
|
- pending_exact = b - 1;
|
|
|
- }
|
|
|
+
|
|
|
+ `string1' is null. */
|
|
|
+ if (size2 == 0 && string1 != NULL) {
|
|
|
+ string2 = string1;
|
|
|
+ size2 = size1;
|
|
|
+ string1 = 0;
|
|
|
+ size1 = 0;
|
|
|
+ }
|
|
|
+ end1 = string1 + size1;
|
|
|
+ end2 = string2 + size2;
|
|
|
|
|
|
- BUF_PUSH(c);
|
|
|
- (*pending_exact)++;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ if (stop <= size1) {
|
|
|
+ end_match_1 = string1 + stop;
|
|
|
+ end_match_2 = string2;
|
|
|
+ } else {
|
|
|
+ end_match_1 = end1;
|
|
|
+ end_match_2 = string2 + stop - size1;
|
|
|
+ }
|
|
|
|
|
|
+
|
|
|
+ `dend' is the end of the input string that `d' points within. `d'
|
|
|
+ is advanced into the following input string whenever necessary, but
|
|
|
+ this happens before fetching; therefore, at the beginning of the
|
|
|
+ loop, `d' can be pointing at the end of a string, but it cannot
|
|
|
+ equal `string2'. */
|
|
|
+ if (size1 > 0 && pos <= size1) {
|
|
|
+ d = string1 + pos;
|
|
|
+ dend = end_match_1;
|
|
|
+ } else {
|
|
|
+ d = string2 + pos - size1;
|
|
|
+ dend = end_match_2;
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
+ DEBUG_PRINT1("The compiled pattern is:\n");
|
|
|
+ DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
|
|
|
+ DEBUG_PRINT1("The string to match is: `");
|
|
|
+ DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
|
|
|
+ DEBUG_PRINT1("'\n");
|
|
|
|
|
|
- if (fixup_alt_jump)
|
|
|
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
|
|
|
+
|
|
|
+ function if the match is complete, or it drops through if the match
|
|
|
+ fails at this starting point in the input data. */
|
|
|
+ for (;;) {
|
|
|
+#ifdef _LIBC
|
|
|
+ DEBUG_PRINT2("\n%p: ", p);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT2("\n0x%x: ", p);
|
|
|
+#endif
|
|
|
|
|
|
- if (!COMPILE_STACK_EMPTY)
|
|
|
- FREE_STACK_RETURN(REG_EPAREN);
|
|
|
+ if (p == pend) {
|
|
|
+ DEBUG_PRINT1("end of pattern ... ");
|
|
|
|
|
|
-
|
|
|
- the first time we reach the end of the compiled pattern. */
|
|
|
- if (syntax & RE_NO_POSIX_BACKTRACKING)
|
|
|
- BUF_PUSH(succeed);
|
|
|
+
|
|
|
+ longest match, try backtracking. */
|
|
|
+ if (d != end_match_2) {
|
|
|
+
|
|
|
+ as the best previous match. */
|
|
|
+ boolean same_str_p = (FIRST_STRING_P(match_end)
|
|
|
+ == MATCHING_IN_FIRST_STRING);
|
|
|
|
|
|
- free(compile_stack.stack);
|
|
|
+
|
|
|
+ boolean best_match_p;
|
|
|
|
|
|
-
|
|
|
- bufp->used = b - bufp->buffer;
|
|
|
+
|
|
|
+ with the previous declaration. */
|
|
|
+ if (same_str_p)
|
|
|
+ best_match_p = d > match_end;
|
|
|
+ else
|
|
|
+ best_match_p = !MATCHING_IN_FIRST_STRING;
|
|
|
|
|
|
-#ifdef DEBUG
|
|
|
- if (debug) {
|
|
|
- DEBUG_PRINT1("\nCompiled pattern: \n");
|
|
|
- print_compiled_pattern(bufp);
|
|
|
- }
|
|
|
-#endif
|
|
|
+ DEBUG_PRINT1("backtracking.\n");
|
|
|
|
|
|
-#ifndef MATCH_MAY_ALLOCATE
|
|
|
-
|
|
|
- isn't necessary unless we're trying to avoid calling alloca in
|
|
|
- the search and match routines. */
|
|
|
- {
|
|
|
- int num_regs = bufp->re_nsub + 1;
|
|
|
+ if (!FAIL_STACK_EMPTY()) {
|
|
|
|
|
|
-
|
|
|
- is strictly greater than re_max_failures, the largest possible stack
|
|
|
- is 2 * re_max_failures failure points. */
|
|
|
- if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
|
|
|
- fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
|
|
|
+
|
|
|
+ if (!best_regs_set || best_match_p) {
|
|
|
+ best_regs_set = true;
|
|
|
+ match_end = d;
|
|
|
|
|
|
-# ifdef emacs
|
|
|
- if (!fail_stack.stack)
|
|
|
- fail_stack.stack
|
|
|
- = (fail_stack_elt_t *) xmalloc(fail_stack.size
|
|
|
- *
|
|
|
- sizeof
|
|
|
- (fail_stack_elt_t));
|
|
|
- else
|
|
|
- fail_stack.stack =
|
|
|
- (fail_stack_elt_t *) xrealloc(fail_stack.stack,
|
|
|
- (fail_stack.size *
|
|
|
- sizeof
|
|
|
- (fail_stack_elt_t)));
|
|
|
-# else
|
|
|
- if (!fail_stack.stack)
|
|
|
- fail_stack.stack
|
|
|
- = (fail_stack_elt_t *) malloc(fail_stack.size
|
|
|
- *
|
|
|
- sizeof
|
|
|
- (fail_stack_elt_t));
|
|
|
- else
|
|
|
- fail_stack.stack =
|
|
|
- (fail_stack_elt_t *) realloc(fail_stack.stack,
|
|
|
- (fail_stack.size *
|
|
|
- sizeof
|
|
|
- (fail_stack_elt_t)));
|
|
|
-# endif
|
|
|
- }
|
|
|
+ DEBUG_PRINT1("\nSAVING match as best so far.\n");
|
|
|
|
|
|
- regex_grow_registers(num_regs);
|
|
|
- }
|
|
|
-#endif
|
|
|
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
|
|
|
+ best_regstart[mcnt] = regstart[mcnt];
|
|
|
+ best_regend[mcnt] = regend[mcnt];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
|
|
|
- return REG_NOERROR;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+ last match is real best match, don't restore second
|
|
|
+ best one. */
|
|
|
+ else if (best_regs_set && !best_match_p) {
|
|
|
+ restore_best_regs:
|
|
|
+
|
|
|
+ end_match_1' while the restored d is in string2.
|
|
|
+ For example, the pattern `x.*y.*z' against the
|
|
|
+ strings `x-' and `y-z-', if the two strings are
|
|
|
+ not consecutive in memory. */
|
|
|
+ DEBUG_PRINT1("Restoring best registers.\n");
|
|
|
|
|
|
-
|
|
|
+ d = match_end;
|
|
|
+ dend = ((d >= string1 && d <= end1)
|
|
|
+ ? end_match_1 : end_match_2);
|
|
|
|
|
|
-static void store_op1(op, loc, arg)
|
|
|
-re_opcode_t op;
|
|
|
-unsigned char *loc;
|
|
|
-int arg;
|
|
|
-{
|
|
|
- *loc = (unsigned char) op;
|
|
|
- STORE_NUMBER(loc + 1, arg);
|
|
|
-}
|
|
|
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
|
|
|
+ regstart[mcnt] = best_regstart[mcnt];
|
|
|
+ regend[mcnt] = best_regend[mcnt];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ succeed_label:
|
|
|
+ DEBUG_PRINT1("Accepting match.\n");
|
|
|
|
|
|
+
|
|
|
+ if (regs && !bufp->no_sub) {
|
|
|
+
|
|
|
+ if (bufp->regs_allocated == REGS_UNALLOCATED) {
|
|
|
+ extra element beyond `num_regs' for the `-1' marker
|
|
|
+ GNU code uses. */
|
|
|
+ regs->num_regs = MAX(RE_NREGS, num_regs + 1);
|
|
|
+ regs->start = TALLOC(regs->num_regs, regoff_t);
|
|
|
+ regs->end = TALLOC(regs->num_regs, regoff_t);
|
|
|
+ if (regs->start == NULL || regs->end == NULL) {
|
|
|
+ FREE_VARIABLES();
|
|
|
+ return -2;
|
|
|
+ }
|
|
|
+ bufp->regs_allocated = REGS_REALLOCATE;
|
|
|
+ } else if (bufp->regs_allocated == REGS_REALLOCATE) {
|
|
|
+ allocated, reallocate them. If we need fewer, just
|
|
|
+ leave it alone. */
|
|
|
+ if (regs->num_regs < num_regs + 1) {
|
|
|
+ regs->num_regs = num_regs + 1;
|
|
|
+ RETALLOC(regs->start, regs->num_regs, regoff_t);
|
|
|
+ RETALLOC(regs->end, regs->num_regs, regoff_t);
|
|
|
+ if (regs->start == NULL || regs->end == NULL) {
|
|
|
+ FREE_VARIABLES();
|
|
|
+ return -2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+
|
|
|
+ warning under GCC when assert expands to nothing. */
|
|
|
+ assert(bufp->regs_allocated == REGS_FIXED);
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
+
|
|
|
+ indices. Register zero has to be set differently,
|
|
|
+ since we haven't kept track of any info for it. */
|
|
|
+ if (regs->num_regs > 0) {
|
|
|
+ regs->start[0] = pos;
|
|
|
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
|
|
|
+ ? ((regoff_t) (d - string1))
|
|
|
+ : ((regoff_t) (d - string2 + size1)));
|
|
|
+ }
|
|
|
|
|
|
-static void store_op2(op, loc, arg1, arg2)
|
|
|
-re_opcode_t op;
|
|
|
-unsigned char *loc;
|
|
|
-int arg1, arg2;
|
|
|
-{
|
|
|
- *loc = (unsigned char) op;
|
|
|
- STORE_NUMBER(loc + 1, arg1);
|
|
|
- STORE_NUMBER(loc + 3, arg2);
|
|
|
-}
|
|
|
+
|
|
|
+ registers, since that is all we initialized. */
|
|
|
+ for (mcnt = 1;
|
|
|
+ (unsigned) mcnt < MIN(num_regs, regs->num_regs);
|
|
|
+ mcnt++) {
|
|
|
+ if (REG_UNSET(regstart[mcnt])
|
|
|
+ || REG_UNSET(regend[mcnt])) regs->start[mcnt] =
|
|
|
+ regs->end[mcnt] = -1;
|
|
|
+ else {
|
|
|
+ regs->start[mcnt]
|
|
|
+ = (regoff_t) POINTER_TO_OFFSET(regstart[mcnt]);
|
|
|
+ regs->end[mcnt]
|
|
|
+ = (regoff_t) POINTER_TO_OFFSET(regend[mcnt]);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
+
|
|
|
+ were in the pattern, set the extra elements to -1. If
|
|
|
+ we (re)allocated the registers, this is the case,
|
|
|
+ because we always allocate enough to have at least one
|
|
|
+ -1 at the end. */
|
|
|
+ for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs;
|
|
|
+ mcnt++)
|
|
|
+ regs->start[mcnt] = regs->end[mcnt] = -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ DEBUG_PRINT4
|
|
|
+ ("%u failure points pushed, %u popped (%u remain).\n",
|
|
|
+ nfailure_points_pushed, nfailure_points_popped,
|
|
|
+ nfailure_points_pushed - nfailure_points_popped);
|
|
|
+ DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
|
|
|
|
|
|
-
|
|
|
- for OP followed by two-byte integer parameter ARG. */
|
|
|
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
|
|
|
+ ? string1 : string2 - size1);
|
|
|
|
|
|
-static void insert_op1(op, loc, arg, end)
|
|
|
-re_opcode_t op;
|
|
|
-unsigned char *loc;
|
|
|
-int arg;
|
|
|
-unsigned char *end;
|
|
|
-{
|
|
|
- register unsigned char *pfrom = end;
|
|
|
- register unsigned char *pto = end + 3;
|
|
|
+ DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
|
|
|
|
|
|
- while (pfrom != loc)
|
|
|
- *--pto = *--pfrom;
|
|
|
+ FREE_VARIABLES();
|
|
|
+ return mcnt;
|
|
|
+ }
|
|
|
|
|
|
- store_op1(op, loc, arg);
|
|
|
-}
|
|
|
+
|
|
|
+ switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
|
|
|
+
|
|
|
+ currently have n == 0. */
|
|
|
+ case no_op:
|
|
|
+ DEBUG_PRINT1("EXECUTING no_op.\n");
|
|
|
+ break;
|
|
|
|
|
|
+ case succeed:
|
|
|
+ DEBUG_PRINT1("EXECUTING succeed.\n");
|
|
|
+ goto succeed_label;
|
|
|
|
|
|
-
|
|
|
+
|
|
|
+ byte in the pattern defines n, and the n bytes after that
|
|
|
+ are the characters to match. */
|
|
|
+ case exactn:
|
|
|
+ mcnt = *p++;
|
|
|
+ DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
|
|
|
|
|
|
-static void insert_op2(op, loc, arg1, arg2, end)
|
|
|
-re_opcode_t op;
|
|
|
-unsigned char *loc;
|
|
|
-int arg1, arg2;
|
|
|
-unsigned char *end;
|
|
|
-{
|
|
|
- register unsigned char *pfrom = end;
|
|
|
- register unsigned char *pto = end + 5;
|
|
|
+
|
|
|
+ testing `translate' inside the loop. */
|
|
|
+ if (translate) {
|
|
|
+ do {
|
|
|
+ PREFETCH();
|
|
|
+ if ((unsigned char) translate[(unsigned char) *d++]
|
|
|
+ != (unsigned char) *p++)
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+ while (--mcnt);
|
|
|
+ } else {
|
|
|
+ do {
|
|
|
+ PREFETCH();
|
|
|
+ if (*d++ != (char) *p++)
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+ while (--mcnt);
|
|
|
+ }
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ break;
|
|
|
|
|
|
- while (pfrom != loc)
|
|
|
- *--pto = *--pfrom;
|
|
|
|
|
|
- store_op2(op, loc, arg1, arg2);
|
|
|
-}
|
|
|
+
|
|
|
+ case anychar:
|
|
|
+ DEBUG_PRINT1("EXECUTING anychar.\n");
|
|
|
|
|
|
+ PREFETCH();
|
|
|
|
|
|
-
|
|
|
- after an alternative or a begin-subexpression. We assume there is at
|
|
|
- least one character before the ^. */
|
|
|
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE(*d) == '\n')
|
|
|
+ || (bufp->syntax & RE_DOT_NOT_NULL
|
|
|
+ && TRANSLATE(*d) == '\000')) goto fail;
|
|
|
|
|
|
-static boolean at_begline_loc_p(pattern, p, syntax)
|
|
|
-const char *pattern, *p;
|
|
|
-reg_syntax_t syntax;
|
|
|
-{
|
|
|
- const char *prev = p - 2;
|
|
|
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ DEBUG_PRINT2(" Matched `%d'.\n", *d);
|
|
|
+ d++;
|
|
|
+ break;
|
|
|
|
|
|
- return
|
|
|
-
|
|
|
- (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
|
|
|
-
|
|
|
- || (*prev == '|'
|
|
|
- && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
|
|
|
-}
|
|
|
|
|
|
+ case charset:
|
|
|
+ case charset_not:
|
|
|
+ {
|
|
|
+ register unsigned char c;
|
|
|
+ boolean not = (re_opcode_t) * (p - 1) == charset_not;
|
|
|
+
|
|
|
+ DEBUG_PRINT2("EXECUTING charset%s.\n", not ? "_not" : "");
|
|
|
|
|
|
-
|
|
|
- at least one character after the $, i.e., `P < PEND'. */
|
|
|
+ PREFETCH();
|
|
|
+ c = TRANSLATE(*d);
|
|
|
|
|
|
-static boolean at_endline_loc_p(p, pend, syntax)
|
|
|
-const char *p, *pend;
|
|
|
-reg_syntax_t syntax;
|
|
|
-{
|
|
|
- const char *next = p;
|
|
|
- boolean next_backslash = *next == '\\';
|
|
|
- const char *next_next = p + 1 < pend ? p + 1 : 0;
|
|
|
+
|
|
|
+ bit list is a full 32 bytes long. */
|
|
|
+ if (c < (unsigned) (*p * BYTEWIDTH)
|
|
|
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
|
|
|
+ not = !not;
|
|
|
|
|
|
- return
|
|
|
-
|
|
|
- (syntax & RE_NO_BK_PARENS ? *next == ')'
|
|
|
- : next_backslash && next_next && *next_next == ')')
|
|
|
-
|
|
|
- || (syntax & RE_NO_BK_VBAR ? *next == '|'
|
|
|
- : next_backslash && next_next && *next_next == '|');
|
|
|
-}
|
|
|
+ p += 1 + *p;
|
|
|
|
|
|
+ if (!not)
|
|
|
+ goto fail;
|
|
|
|
|
|
-
|
|
|
- false if it's not. */
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ d++;
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
-static boolean group_in_compile_stack(compile_stack, regnum)
|
|
|
-compile_stack_type compile_stack;
|
|
|
-regnum_t regnum;
|
|
|
-{
|
|
|
- int this_element;
|
|
|
|
|
|
- for (this_element = compile_stack.avail - 1;
|
|
|
- this_element >= 0; this_element--)
|
|
|
- if (compile_stack.stack[this_element].regnum == regnum)
|
|
|
- return true;
|
|
|
+
|
|
|
+ The arguments are the register number in the next byte, and the
|
|
|
+ number of groups inner to this one in the next. The text
|
|
|
+ matched within the group is recorded (in the internal
|
|
|
+ registers data structure) under the register number. */
|
|
|
+ case start_memory:
|
|
|
+ DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p, p[1]);
|
|
|
|
|
|
- return false;
|
|
|
-}
|
|
|
+
|
|
|
+ p1 = p;
|
|
|
|
|
|
+ if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
|
|
|
+ MATCH_NULL_UNSET_VALUE)
|
|
|
+ REG_MATCH_NULL_STRING_P(reg_info[*p]) =
|
|
|
+ group_match_null_string_p(&p1, pend, reg_info);
|
|
|
|
|
|
-
|
|
|
- uncompiled pattern *P_PTR (which ends at PEND). We assume the
|
|
|
- starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
|
|
|
- Then we set the translation of all bits between the starting and
|
|
|
- ending characters (inclusive) in the compiled pattern B.
|
|
|
+
|
|
|
+ we were at this open-group operator in case the group is
|
|
|
+ operated upon by a repetition operator, e.g., with `(a*)*b'
|
|
|
+ against `ab'; then we want to ignore where we are now in
|
|
|
+ the string in case this attempt to match fails. */
|
|
|
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
|
|
|
+ ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
|
|
|
+ : regstart[*p];
|
|
|
+ DEBUG_PRINT2(" old_regstart: %d\n",
|
|
|
+ POINTER_TO_OFFSET(old_regstart[*p]));
|
|
|
|
|
|
- Return an error code.
|
|
|
+ regstart[*p] = d;
|
|
|
+ DEBUG_PRINT2(" regstart: %d\n",
|
|
|
+ POINTER_TO_OFFSET(regstart[*p]));
|
|
|
|
|
|
- We use these short variable names so we can use the same macros as
|
|
|
- `regex_compile' itself. */
|
|
|
+ IS_ACTIVE(reg_info[*p]) = 1;
|
|
|
+ MATCHED_SOMETHING(reg_info[*p]) = 0;
|
|
|
|
|
|
-static reg_errcode_t compile_range(p_ptr, pend, translate, syntax, b)
|
|
|
-const char **p_ptr, *pend;
|
|
|
-RE_TRANSLATE_TYPE translate;
|
|
|
-reg_syntax_t syntax;
|
|
|
-unsigned char *b;
|
|
|
-{
|
|
|
- unsigned this_char;
|
|
|
+
|
|
|
+ set_regs_matched_done = 0;
|
|
|
|
|
|
- const char *p = *p_ptr;
|
|
|
- reg_errcode_t ret;
|
|
|
- char range_start[2];
|
|
|
- char range_end[2];
|
|
|
- char ch[2];
|
|
|
+
|
|
|
+ highest_active_reg = *p;
|
|
|
|
|
|
- if (p == pend)
|
|
|
- return REG_ERANGE;
|
|
|
+
|
|
|
+ register. */
|
|
|
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
|
|
|
+ lowest_active_reg = *p;
|
|
|
|
|
|
-
|
|
|
- appropriate translation is done in the bit-setting loop below. */
|
|
|
- range_start[0] = p[-2];
|
|
|
- range_start[1] = '\0';
|
|
|
- range_end[0] = p[0];
|
|
|
- range_end[1] = '\0';
|
|
|
+
|
|
|
+ p += 2;
|
|
|
+ just_past_start_mem = p;
|
|
|
|
|
|
-
|
|
|
- caller isn't still at the ending character. */
|
|
|
- (*p_ptr)++;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
|
|
|
|
|
|
-
|
|
|
- char' -- we would otherwise go into an infinite loop, since all
|
|
|
- characters <= 0xff. */
|
|
|
- ch[1] = '\0';
|
|
|
- for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) {
|
|
|
- ch[0] = this_char;
|
|
|
- if (strcoll(range_start, ch) <= 0 && strcoll(ch, range_end) <= 0) {
|
|
|
- SET_LIST_BIT(TRANSLATE(this_char));
|
|
|
- ret = REG_NOERROR;
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ arguments are the same as start_memory's: the register
|
|
|
+ number, and the number of inner groups. */
|
|
|
+ case stop_memory:
|
|
|
+ DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
|
|
|
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
- BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
|
|
|
- characters can start a string that matches the pattern. This fastmap
|
|
|
- is used by re_search to skip quickly over impossible starting points.
|
|
|
+
|
|
|
+ this close-group operator in case the group is operated
|
|
|
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
|
|
|
+ against `aba'; then we want to ignore where we are now in
|
|
|
+ the string in case this attempt to match fails. */
|
|
|
+ old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
|
|
|
+ ? REG_UNSET(regend[*p]) ? d : regend[*p]
|
|
|
+ : regend[*p];
|
|
|
+ DEBUG_PRINT2(" old_regend: %d\n",
|
|
|
+ POINTER_TO_OFFSET(old_regend[*p]));
|
|
|
|
|
|
- The caller must supply the address of a (1 << BYTEWIDTH)-byte data
|
|
|
- area as BUFP->fastmap.
|
|
|
+ regend[*p] = d;
|
|
|
+ DEBUG_PRINT2(" regend: %d\n",
|
|
|
+ POINTER_TO_OFFSET(regend[*p]));
|
|
|
|
|
|
- We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
|
|
|
- the pattern buffer.
|
|
|
+
|
|
|
+ IS_ACTIVE(reg_info[*p]) = 0;
|
|
|
|
|
|
- Returns 0 if we succeed, -2 if an internal error. */
|
|
|
+
|
|
|
+ set_regs_matched_done = 0;
|
|
|
|
|
|
-int re_compile_fastmap(bufp)
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-{
|
|
|
- int j, k;
|
|
|
+
|
|
|
+ anymore. */
|
|
|
+ if (lowest_active_reg == highest_active_reg) {
|
|
|
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
|
|
|
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
|
|
|
+ } else {
|
|
|
+ it isn't necessarily one less than now: consider
|
|
|
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
|
|
|
+ new highest active register is 1. */
|
|
|
+ unsigned char r = *p - 1;
|
|
|
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
- fail_stack_type fail_stack;
|
|
|
-#endif
|
|
|
-#ifndef REGEX_MALLOC
|
|
|
- char *destination;
|
|
|
-#endif
|
|
|
+ while (r > 0 && !IS_ACTIVE(reg_info[r]))
|
|
|
+ r--;
|
|
|
|
|
|
- register char *fastmap = bufp->fastmap;
|
|
|
- unsigned char *pattern = bufp->buffer;
|
|
|
- unsigned char *p = pattern;
|
|
|
- register unsigned char *pend = pattern + bufp->used;
|
|
|
+
|
|
|
+ the registers as the result of an `on_failure_jump', not
|
|
|
+ a `start_memory', and we jumped to past the innermost
|
|
|
+ `stop_memory'. For example, in ((.)*) we save
|
|
|
+ registers 1 and 2 as a result of the *, but when we pop
|
|
|
+ back to the second ), we are at the stop_memory 1.
|
|
|
+ Thus, nothing is active. */
|
|
|
+ if (r == 0) {
|
|
|
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
|
|
|
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
|
|
|
+ } else
|
|
|
+ highest_active_reg = r;
|
|
|
+ }
|
|
|
|
|
|
-#ifdef REL_ALLOC
|
|
|
-
|
|
|
- it is allocated relocatably. */
|
|
|
- fail_stack_elt_t *failure_stack_ptr;
|
|
|
-#endif
|
|
|
+
|
|
|
+ group that's operated on by a repetition operator, try to
|
|
|
+ force exit from the ``loop'', and restore the register
|
|
|
+ information for this group that we had before trying this
|
|
|
+ last match. */
|
|
|
+ if ((!MATCHED_SOMETHING(reg_info[*p])
|
|
|
+ || just_past_start_mem == p - 1)
|
|
|
+ && (p + 2) < pend) {
|
|
|
+ boolean is_a_jump_n = false;
|
|
|
|
|
|
-
|
|
|
- proven otherwise. We set this false at the bottom of switch
|
|
|
- statement, to which we get only if a particular path doesn't
|
|
|
- match the empty string. */
|
|
|
- boolean path_can_be_null = true;
|
|
|
+ p1 = p + 2;
|
|
|
+ mcnt = 0;
|
|
|
+ switch ((re_opcode_t) * p1++) {
|
|
|
+ case jump_n:
|
|
|
+ is_a_jump_n = true;
|
|
|
+ case pop_failure_jump:
|
|
|
+ case maybe_pop_jump:
|
|
|
+ case jump:
|
|
|
+ case dummy_failure_jump:
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ if (is_a_jump_n)
|
|
|
+ p1 += 2;
|
|
|
+ break;
|
|
|
+
|
|
|
+ default:
|
|
|
+ ;
|
|
|
+ }
|
|
|
+ p1 += mcnt;
|
|
|
+
|
|
|
+
|
|
|
+ to an on_failure_jump right before the start_memory
|
|
|
+ corresponding to this stop_memory, exit from the loop
|
|
|
+ by forcing a failure after pushing on the stack the
|
|
|
+ on_failure_jump's jump in the pattern, and d. */
|
|
|
+ if (mcnt < 0 && (re_opcode_t) * p1 == on_failure_jump
|
|
|
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) {
|
|
|
+
|
|
|
+ what its registers were before trying this last
|
|
|
+ failed match, e.g., with `(a*)*b' against `ab' for
|
|
|
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
|
|
|
+ against `aba' for regend[3].
|
|
|
+
|
|
|
+ Also restore the registers for inner groups for,
|
|
|
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
|
|
|
+ otherwise get trashed). */
|
|
|
|
|
|
-
|
|
|
- boolean succeed_n_p = false;
|
|
|
+ if (EVER_MATCHED_SOMETHING(reg_info[*p])) {
|
|
|
+ unsigned r;
|
|
|
|
|
|
- assert(fastmap != NULL && p != NULL);
|
|
|
+ EVER_MATCHED_SOMETHING(reg_info[*p]) = 0;
|
|
|
|
|
|
- INIT_FAIL_STACK();
|
|
|
- bzero(fastmap, 1 << BYTEWIDTH);
|
|
|
- bufp->fastmap_accurate = 1;
|
|
|
- bufp->can_be_null = 0;
|
|
|
+
|
|
|
+ for (r = *p;
|
|
|
+ r < (unsigned) *p + (unsigned) *(p + 1); r++) {
|
|
|
+ regstart[r] = old_regstart[r];
|
|
|
|
|
|
- while (1) {
|
|
|
- if (p == pend || *p == succeed) {
|
|
|
-
|
|
|
- if (!FAIL_STACK_EMPTY()) {
|
|
|
- bufp->can_be_null |= path_can_be_null;
|
|
|
+
|
|
|
+ if (old_regend[r] >= regstart[r])
|
|
|
+ regend[r] = old_regend[r];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ p1++;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
|
|
|
|
|
|
-
|
|
|
- path_can_be_null = true;
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- p = fail_stack.stack[--fail_stack.avail].pointer;
|
|
|
+
|
|
|
+ p += 2;
|
|
|
+ break;
|
|
|
|
|
|
- continue;
|
|
|
- } else
|
|
|
- break;
|
|
|
- }
|
|
|
|
|
|
-
|
|
|
- assert(p < pend);
|
|
|
+
|
|
|
+ followed by the numeric value of <digit> as the register number. */
|
|
|
+ case duplicate:
|
|
|
+ {
|
|
|
+ register const char *d2, *dend2;
|
|
|
+ int regno = *p++;
|
|
|
|
|
|
- switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
|
|
|
+ DEBUG_PRINT2("EXECUTING duplicate %d.\n", regno);
|
|
|
|
|
|
-
|
|
|
- if a backreference is used, since it's too hard to figure out
|
|
|
- the fastmap for the corresponding group. Setting
|
|
|
- `can_be_null' stops `re_search_2' from using the fastmap, so
|
|
|
- that is all we do. */
|
|
|
- case duplicate:
|
|
|
- bufp->can_be_null = 1;
|
|
|
- goto done;
|
|
|
+
|
|
|
+ if (REG_UNSET(regstart[regno]) || REG_UNSET(regend[regno]))
|
|
|
+ goto fail;
|
|
|
|
|
|
+
|
|
|
+ d2 = regstart[regno];
|
|
|
|
|
|
-
|
|
|
- with `break'. */
|
|
|
+
|
|
|
+ the place to stop matching are in the same string, then
|
|
|
+ set to the place to stop, otherwise, for now have to use
|
|
|
+ the end of the first string. */
|
|
|
|
|
|
- case exactn:
|
|
|
- fastmap[p[1]] = 1;
|
|
|
- break;
|
|
|
+ dend2 = ((FIRST_STRING_P(regstart[regno])
|
|
|
+ == FIRST_STRING_P(regend[regno]))
|
|
|
+ ? regend[regno] : end_match_1);
|
|
|
+ for (;;) {
|
|
|
+
|
|
|
+ contents. */
|
|
|
+ while (d2 == dend2) {
|
|
|
+ if (dend2 == end_match_2)
|
|
|
+ break;
|
|
|
+ if (dend2 == regend[regno])
|
|
|
+ break;
|
|
|
|
|
|
+
|
|
|
+ d2 = string2;
|
|
|
+ dend2 = regend[regno];
|
|
|
+ }
|
|
|
+
|
|
|
+ if (d2 == dend2)
|
|
|
+ break;
|
|
|
|
|
|
- case charset:
|
|
|
- for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
|
|
|
- if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
|
|
|
- fastmap[j] = 1;
|
|
|
- break;
|
|
|
+
|
|
|
+ PREFETCH();
|
|
|
|
|
|
+
|
|
|
+ mcnt = dend - d;
|
|
|
|
|
|
- case charset_not:
|
|
|
-
|
|
|
- for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
|
|
|
- fastmap[j] = 1;
|
|
|
+
|
|
|
+ one shot, so, if necessary, adjust the count. */
|
|
|
+ if (mcnt > dend2 - d2)
|
|
|
+ mcnt = dend2 - d2;
|
|
|
|
|
|
- for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
|
|
|
- if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
|
|
|
- fastmap[j] = 1;
|
|
|
+
|
|
|
+ past them. */
|
|
|
+ if (translate ? bcmp_translate(d, d2, mcnt, translate)
|
|
|
+ : memcmp(d, d2, mcnt))
|
|
|
+ goto fail;
|
|
|
+ d += mcnt, d2 += mcnt;
|
|
|
+
|
|
|
+
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ }
|
|
|
+ }
|
|
|
break;
|
|
|
|
|
|
|
|
|
- case wordchar:
|
|
|
- for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
- if (SYNTAX(j) == Sword)
|
|
|
- fastmap[j] = 1;
|
|
|
- break;
|
|
|
+
|
|
|
+ (unless `not_bol' is set in `bufp'), and, if
|
|
|
+ `newline_anchor' is set, after newlines. */
|
|
|
+ case begline:
|
|
|
+ DEBUG_PRINT1("EXECUTING begline.\n");
|
|
|
|
|
|
+ if (AT_STRINGS_BEG(d)) {
|
|
|
+ if (!bufp->not_bol)
|
|
|
+ break;
|
|
|
+ } else if (d[-1] == '\n' && bufp->newline_anchor) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ goto fail;
|
|
|
|
|
|
- case notwordchar:
|
|
|
- for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
- if (SYNTAX(j) != Sword)
|
|
|
- fastmap[j] = 1;
|
|
|
- break;
|
|
|
|
|
|
+
|
|
|
+ case endline:
|
|
|
+ DEBUG_PRINT1("EXECUTING endline.\n");
|
|
|
|
|
|
- case anychar:
|
|
|
- {
|
|
|
- int fastmap_newline = fastmap['\n'];
|
|
|
+ if (AT_STRINGS_END(d)) {
|
|
|
+ if (!bufp->not_eol)
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
- fastmap[j] = 1;
|
|
|
+
|
|
|
+ else if ((d == end1 ? *string2 : *d) == '\n'
|
|
|
+ && bufp->newline_anchor) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ goto fail;
|
|
|
|
|
|
-
|
|
|
- if (!(bufp->syntax & RE_DOT_NEWLINE))
|
|
|
- fastmap['\n'] = fastmap_newline;
|
|
|
|
|
|
-
|
|
|
- then the fastmap is irrelevant. Something's wrong here. */
|
|
|
- else if (bufp->can_be_null)
|
|
|
- goto done;
|
|
|
+
|
|
|
+ case begbuf:
|
|
|
+ DEBUG_PRINT1("EXECUTING begbuf.\n");
|
|
|
+ if (AT_STRINGS_BEG(d))
|
|
|
+ break;
|
|
|
+ goto fail;
|
|
|
|
|
|
-
|
|
|
- break;
|
|
|
- }
|
|
|
|
|
|
-#ifdef emacs
|
|
|
- case syntaxspec:
|
|
|
- k = *p++;
|
|
|
- for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
- if (SYNTAX(j) == (enum syntaxcode) k)
|
|
|
- fastmap[j] = 1;
|
|
|
- break;
|
|
|
+
|
|
|
+ case endbuf:
|
|
|
+ DEBUG_PRINT1("EXECUTING endbuf.\n");
|
|
|
+ if (AT_STRINGS_END(d))
|
|
|
+ break;
|
|
|
+ goto fail;
|
|
|
|
|
|
|
|
|
- case notsyntaxspec:
|
|
|
- k = *p++;
|
|
|
- for (j = 0; j < (1 << BYTEWIDTH); j++)
|
|
|
- if (SYNTAX(j) != (enum syntaxcode) k)
|
|
|
- fastmap[j] = 1;
|
|
|
- break;
|
|
|
+
|
|
|
+ pushes NULL as the value for the string on the stack. Then
|
|
|
+ `pop_failure_point' will keep the current value for the
|
|
|
+ string, instead of restoring it. To see why, consider
|
|
|
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
|
|
|
+ then the . fails against the \n. But the next thing we want
|
|
|
+ to do is match the \n against the \n; if we restored the
|
|
|
+ string value, we would be back at the foo.
|
|
|
+
|
|
|
+ Because this is used only in specific cases, we don't need to
|
|
|
+ check all the things that `on_failure_jump' does, to make
|
|
|
+ sure the right things get saved on the stack. Hence we don't
|
|
|
+ share its code. The only reason to push anything on the
|
|
|
+ stack at all is that otherwise we would have to change
|
|
|
+ `anychar's code to do something besides goto fail in this
|
|
|
+ case; that seems worse than this. */
|
|
|
+ case on_failure_keep_string_jump:
|
|
|
+ DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
|
|
|
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
+#ifdef _LIBC
|
|
|
+ DEBUG_PRINT3(" %d (to %p):\n", mcnt, p + mcnt);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT3(" %d (to 0x%x):\n", mcnt, p + mcnt);
|
|
|
+#endif
|
|
|
|
|
|
-
|
|
|
- `continue'. */
|
|
|
+ PUSH_FAILURE_POINT(p + mcnt, NULL, -2);
|
|
|
+ break;
|
|
|
|
|
|
|
|
|
- case before_dot:
|
|
|
- case at_dot:
|
|
|
- case after_dot:
|
|
|
- continue;
|
|
|
-#endif
|
|
|
+
|
|
|
|
|
|
+ Each alternative starts with an on_failure_jump that points
|
|
|
+ to the beginning of the next alternative. Each alternative
|
|
|
+ except the last ends with a jump that in effect jumps past
|
|
|
+ the rest of the alternatives. (They really jump to the
|
|
|
+ ending jump of the following alternative, because tensioning
|
|
|
+ these jumps is a hassle.)
|
|
|
|
|
|
- case no_op:
|
|
|
- case begline:
|
|
|
- case endline:
|
|
|
- case begbuf:
|
|
|
- case endbuf:
|
|
|
- case wordbound:
|
|
|
- case notwordbound:
|
|
|
- case wordbeg:
|
|
|
- case wordend:
|
|
|
- case push_dummy_failure:
|
|
|
- continue;
|
|
|
+ Repeats start with an on_failure_jump that points past both
|
|
|
+ the repetition text and either the following jump or
|
|
|
+ pop_failure_jump back to this on_failure_jump. */
|
|
|
+ case on_failure_jump:
|
|
|
+ on_failure:
|
|
|
+ DEBUG_PRINT1("EXECUTING on_failure_jump");
|
|
|
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
+#ifdef _LIBC
|
|
|
+ DEBUG_PRINT3(" %d (to %p)", mcnt, p + mcnt);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT3(" %d (to 0x%x)", mcnt, p + mcnt);
|
|
|
+#endif
|
|
|
|
|
|
- case jump_n:
|
|
|
- case pop_failure_jump:
|
|
|
- case maybe_pop_jump:
|
|
|
- case jump:
|
|
|
- case jump_past_alt:
|
|
|
- case dummy_failure_jump:
|
|
|
- EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
- p += j;
|
|
|
- if (j > 0)
|
|
|
- continue;
|
|
|
+
|
|
|
+ the original * applied to a group), save the information
|
|
|
+ for that group and all inner ones, so that if we fail back
|
|
|
+ to this point, the group's information will be correct.
|
|
|
+ For example, in \(a*\)*\1, we need the preceding group,
|
|
|
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
|
|
|
|
|
|
-
|
|
|
- loop and matched nothing. Opcode jumped to should be
|
|
|
- `on_failure_jump' or `succeed_n'. Just treat it like an
|
|
|
- ordinary jump. For a * loop, it has pushed its failure
|
|
|
- point already; if so, discard that as redundant. */
|
|
|
- if ((re_opcode_t) * p != on_failure_jump
|
|
|
- && (re_opcode_t) * p != succeed_n)
|
|
|
- continue;
|
|
|
+
|
|
|
+ a failure point to `p + mcnt' after we do this. */
|
|
|
+ p1 = p;
|
|
|
|
|
|
- p++;
|
|
|
- EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
- p += j;
|
|
|
+
|
|
|
+ start_memory in case this on_failure_jump is happening as
|
|
|
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
|
|
|
+ against aba. */
|
|
|
+ while (p1 < pend && (re_opcode_t) * p1 == no_op)
|
|
|
+ p1++;
|
|
|
|
|
|
-
|
|
|
- if (!FAIL_STACK_EMPTY()
|
|
|
- && fail_stack.stack[fail_stack.avail - 1].pointer == p)
|
|
|
- fail_stack.avail--;
|
|
|
+ if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
|
|
|
+
|
|
|
+ get reset at the start_memory we are about to get to,
|
|
|
+ but we will have saved all the registers relevant to
|
|
|
+ this repetition op, as described above. */
|
|
|
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
|
|
|
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
|
|
|
+ lowest_active_reg = *(p1 + 1);
|
|
|
+ }
|
|
|
|
|
|
- continue;
|
|
|
+ DEBUG_PRINT1(":\n");
|
|
|
+ PUSH_FAILURE_POINT(p + mcnt, d, -2);
|
|
|
+ break;
|
|
|
|
|
|
|
|
|
- case on_failure_jump:
|
|
|
- case on_failure_keep_string_jump:
|
|
|
- handle_on_failure_jump:
|
|
|
- EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
+
|
|
|
+ We change it to either `pop_failure_jump' or `jump'. */
|
|
|
+ case maybe_pop_jump:
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
+ DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
|
|
|
+ {
|
|
|
+ register unsigned char *p2 = p;
|
|
|
|
|
|
-
|
|
|
- end of the pattern. We don't want to push such a point,
|
|
|
- since when we restore it above, entering the switch will
|
|
|
- increment `p' past the end of the pattern. We don't need
|
|
|
- to push such a point since we obviously won't find any more
|
|
|
- fastmap entries beyond `pend'. Such a pattern can match
|
|
|
- the null string, though. */
|
|
|
- if (p + j < pend) {
|
|
|
- if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
|
|
|
- RESET_FAIL_STACK();
|
|
|
- return -2;
|
|
|
- }
|
|
|
- } else
|
|
|
- bufp->can_be_null = 1;
|
|
|
+
|
|
|
+ pattern follows its end. If we can establish that there
|
|
|
+ is nothing that they would both match, i.e., that we
|
|
|
+ would have to backtrack because of (as in, e.g., `a*a')
|
|
|
+ then we can change to pop_failure_jump, because we'll
|
|
|
+ never have to backtrack.
|
|
|
|
|
|
- if (succeed_n_p) {
|
|
|
- EXTRACT_NUMBER_AND_INCR(k, p);
|
|
|
- succeed_n_p = false;
|
|
|
- }
|
|
|
+ This is not true in the case of alternatives: in
|
|
|
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
|
|
|
+ (e.g., if the string was `ab'). But instead of trying to
|
|
|
+ detect that here, the alternative has put on a dummy
|
|
|
+ failure point which is what we will end up popping. */
|
|
|
|
|
|
- continue;
|
|
|
+
|
|
|
+ If what follows this loop is a ...+ construct,
|
|
|
+ look at what begins its body, since we will have to
|
|
|
+ match at least one of that. */
|
|
|
+ while (1) {
|
|
|
+ if (p2 + 2 < pend
|
|
|
+ && ((re_opcode_t) * p2 == stop_memory
|
|
|
+ || (re_opcode_t) * p2 == start_memory))
|
|
|
+ p2 += 3;
|
|
|
+ else if (p2 + 6 < pend
|
|
|
+ && (re_opcode_t) * p2 == dummy_failure_jump)
|
|
|
+ p2 += 6;
|
|
|
+ else
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
+ p1 = p + mcnt;
|
|
|
+
|
|
|
+ to the `maybe_finalize_jump' of this case. Examine what
|
|
|
+ follows. */
|
|
|
|
|
|
- case succeed_n:
|
|
|
-
|
|
|
- p += 2;
|
|
|
+
|
|
|
+ if (p2 == pend) {
|
|
|
+
|
|
|
+ against ":/". I don't really understand this code
|
|
|
+ yet. */
|
|
|
+ p[-3] = (unsigned char) pop_failure_jump;
|
|
|
+ DEBUG_PRINT1
|
|
|
+ (" End of pattern: change to `pop_failure_jump'.\n");
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- EXTRACT_NUMBER_AND_INCR(k, p);
|
|
|
- if (k == 0) {
|
|
|
- p -= 4;
|
|
|
- succeed_n_p = true;
|
|
|
- goto handle_on_failure_jump;
|
|
|
- }
|
|
|
- continue;
|
|
|
+ else if ((re_opcode_t) * p2 == exactn
|
|
|
+ || (bufp->newline_anchor
|
|
|
+ && (re_opcode_t) * p2 == endline)) {
|
|
|
+ register unsigned char c =
|
|
|
+ *p2 == (unsigned char) endline ? '\n' : p2[2];
|
|
|
|
|
|
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) {
|
|
|
+ p[-3] = (unsigned char) pop_failure_jump;
|
|
|
+ DEBUG_PRINT3(" %c != %c => pop_failure_jump.\n",
|
|
|
+ c, p1[5]);
|
|
|
+ }
|
|
|
|
|
|
- case set_number_at:
|
|
|
- p += 4;
|
|
|
- continue;
|
|
|
+ else if ((re_opcode_t) p1[3] == charset
|
|
|
+ || (re_opcode_t) p1[3] == charset_not) {
|
|
|
+ int not = (re_opcode_t) p1[3] == charset_not;
|
|
|
|
|
|
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
|
|
|
+ && p1[5 +
|
|
|
+ c / BYTEWIDTH] & (1 << (c %
|
|
|
+ BYTEWIDTH))) not
|
|
|
+ = !not;
|
|
|
|
|
|
- case start_memory:
|
|
|
- case stop_memory:
|
|
|
- p += 2;
|
|
|
- continue;
|
|
|
+
|
|
|
+ that we can't change to pop_failure_jump. */
|
|
|
+ if (!not) {
|
|
|
+ p[-3] = (unsigned char) pop_failure_jump;
|
|
|
+ DEBUG_PRINT1
|
|
|
+ (" No match => pop_failure_jump.\n");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else if ((re_opcode_t) * p2 == charset) {
|
|
|
+
|
|
|
+ of the charset. */
|
|
|
+ if ((re_opcode_t) p1[3] == exactn
|
|
|
+ && !((int) p2[1] * BYTEWIDTH > (int) p1[5]
|
|
|
+ && (p2[2 + p1[5] / BYTEWIDTH]
|
|
|
+ & (1 << (p1[5] % BYTEWIDTH))))) {
|
|
|
+ p[-3] = (unsigned char) pop_failure_jump;
|
|
|
+ DEBUG_PRINT1(" No match => pop_failure_jump.\n");
|
|
|
+ }
|
|
|
|
|
|
+ else if ((re_opcode_t) p1[3] == charset_not) {
|
|
|
+ int idx;
|
|
|
|
|
|
- default:
|
|
|
- abort();
|
|
|
- }
|
|
|
+
|
|
|
+ lists every character listed in the charset after. */
|
|
|
+ for (idx = 0; idx < (int) p2[1]; idx++)
|
|
|
+ if (!(p2[2 + idx] == 0 || (idx < (int) p1[4]
|
|
|
+ &&
|
|
|
+ ((p2
|
|
|
+ [2 +
|
|
|
+ idx] & ~p1[5 +
|
|
|
+ idx])
|
|
|
+ == 0))))
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- characters for one path of the pattern -- and that the empty
|
|
|
- string does not match. We need not follow this path further.
|
|
|
- Instead, look at the next alternative (remembered on the
|
|
|
- stack), or quit if no more. The test at the top of the loop
|
|
|
- does these things. */
|
|
|
- path_can_be_null = false;
|
|
|
- p = pend;
|
|
|
- }
|
|
|
+ if (idx == p2[1]) {
|
|
|
+ p[-3] = (unsigned char) pop_failure_jump;
|
|
|
+ DEBUG_PRINT1
|
|
|
+ (" No match => pop_failure_jump.\n");
|
|
|
+ }
|
|
|
+ } else if ((re_opcode_t) p1[3] == charset) {
|
|
|
+ int idx;
|
|
|
|
|
|
-
|
|
|
- pattern is empty). */
|
|
|
- bufp->can_be_null |= path_can_be_null;
|
|
|
+
|
|
|
+ has no overlap with the one after the loop. */
|
|
|
+ for (idx = 0;
|
|
|
+ idx < (int) p2[1] && idx < (int) p1[4]; idx++)
|
|
|
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
|
|
|
+ break;
|
|
|
|
|
|
- done:
|
|
|
- RESET_FAIL_STACK();
|
|
|
- return 0;
|
|
|
-}
|
|
|
+ if (idx == p2[1] || idx == p1[4]) {
|
|
|
+ p[-3] = (unsigned char) pop_failure_jump;
|
|
|
+ DEBUG_PRINT1
|
|
|
+ (" No match => pop_failure_jump.\n");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ p -= 2;
|
|
|
+ if ((re_opcode_t) p[-1] != pop_failure_jump) {
|
|
|
+ p[-1] = (unsigned char) jump;
|
|
|
+ DEBUG_PRINT1(" Match => jump.\n");
|
|
|
+ goto unconditional_jump;
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__re_compile_fastmap, re_compile_fastmap)
|
|
|
-#endif
|
|
|
-
|
|
|
- ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
|
|
|
- this memory for recording register information. STARTS and ENDS
|
|
|
- must be allocated using the malloc library routine, and must each
|
|
|
- be at least NUM_REGS * sizeof (regoff_t) bytes long.
|
|
|
|
|
|
- If NUM_REGS == 0, then subsequent matches should allocate their own
|
|
|
- register data.
|
|
|
+
|
|
|
+ its matching on_failure_jump, where the latter will push a
|
|
|
+ failure point. The pop_failure_jump takes off failure
|
|
|
+ points put on by this pop_failure_jump's matching
|
|
|
+ on_failure_jump; we got through the pattern to here from the
|
|
|
+ matching on_failure_jump, so didn't fail. */
|
|
|
+ case pop_failure_jump:
|
|
|
+ {
|
|
|
+
|
|
|
+ highest registers, even though we don't care about the
|
|
|
+ actual values. Otherwise, we will restore only one
|
|
|
+ register from the stack, since lowest will == highest in
|
|
|
+ `pop_failure_point'. */
|
|
|
+ active_reg_t dummy_low_reg, dummy_high_reg;
|
|
|
+ unsigned char *pdummy;
|
|
|
+ const char *sdummy;
|
|
|
|
|
|
- Unless this function is called, the first search or match using
|
|
|
- PATTERN_BUFFER will allocate its own register data, without
|
|
|
- freeing the old data. */
|
|
|
-void re_set_registers(bufp, regs, num_regs, starts, ends)
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-struct re_registers *regs;
|
|
|
-unsigned num_regs;
|
|
|
-regoff_t *starts, *ends;
|
|
|
-{
|
|
|
- if (num_regs) {
|
|
|
- bufp->regs_allocated = REGS_REALLOCATE;
|
|
|
- regs->num_regs = num_regs;
|
|
|
- regs->start = starts;
|
|
|
- regs->end = ends;
|
|
|
- } else {
|
|
|
- bufp->regs_allocated = REGS_UNALLOCATED;
|
|
|
- regs->num_regs = 0;
|
|
|
- regs->start = regs->end = (regoff_t *) 0;
|
|
|
- }
|
|
|
-}
|
|
|
+ DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
|
|
|
+ POP_FAILURE_POINT(sdummy, pdummy,
|
|
|
+ dummy_low_reg, dummy_high_reg,
|
|
|
+ reg_dummy, reg_dummy, reg_info_dummy);
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
+ unconditional_jump:
|
|
|
#ifdef _LIBC
|
|
|
-weak_alias(__re_set_registers, re_set_registers)
|
|
|
+ DEBUG_PRINT2("\n%p: ", p);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT2("\n0x%x: ", p);
|
|
|
#endif
|
|
|
-
|
|
|
-
|
|
|
- doesn't let you say where to stop matching. */
|
|
|
-int re_search(bufp, string, size, startpos, range, regs)
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-const char *string;
|
|
|
-int size, startpos, range;
|
|
|
-struct re_registers *regs;
|
|
|
-{
|
|
|
- return re_search_2(bufp, NULL, 0, string, size, startpos, range,
|
|
|
- regs, size);
|
|
|
-}
|
|
|
+
|
|
|
|
|
|
+
|
|
|
+ case jump:
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
+ DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
|
|
|
+ p += mcnt;
|
|
|
#ifdef _LIBC
|
|
|
-weak_alias(__re_search, re_search)
|
|
|
+ DEBUG_PRINT2("(to %p).\n", p);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT2("(to 0x%x).\n", p);
|
|
|
#endif
|
|
|
-
|
|
|
- virtual concatenation of STRING1 and STRING2, starting first at index
|
|
|
- STARTPOS, then at STARTPOS + 1, and so on.
|
|
|
+ break;
|
|
|
|
|
|
- STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
|
|
|
|
|
|
- RANGE is how far to scan while trying to match. RANGE = 0 means try
|
|
|
- only at STARTPOS; in general, the last start tried is STARTPOS +
|
|
|
- RANGE.
|
|
|
+
|
|
|
+ in `group_match_null_string_p' et al. */
|
|
|
+ case jump_past_alt:
|
|
|
+ DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
|
|
|
+ goto unconditional_jump;
|
|
|
|
|
|
- In REGS, return the indices of the virtual concatenation of STRING1
|
|
|
- and STRING2 that matched the entire BUFP->buffer and its contained
|
|
|
- subexpressions.
|
|
|
|
|
|
- Do not consider matching one past the index STOP in the virtual
|
|
|
- concatenation of STRING1 and STRING2.
|
|
|
+
|
|
|
+ then gets popped at pop_failure_jump. We will end up at
|
|
|
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
|
|
|
+ are skipping over the on_failure_jump, so we have to push
|
|
|
+ something meaningless for pop_failure_jump to pop. */
|
|
|
+ case dummy_failure_jump:
|
|
|
+ DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
|
|
|
+
|
|
|
+ the code at `fail' tests is the value for the pattern. */
|
|
|
+ PUSH_FAILURE_POINT(NULL, NULL, -2);
|
|
|
+ goto unconditional_jump;
|
|
|
|
|
|
- We return either the position in the strings at which the match was
|
|
|
- found, -1 if no match, or -2 if error (such as failure
|
|
|
- stack overflow). */
|
|
|
-int
|
|
|
-re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs,
|
|
|
- stop)
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-const char *string1, *string2;
|
|
|
-int size1, size2;
|
|
|
-int startpos;
|
|
|
-int range;
|
|
|
-struct re_registers *regs;
|
|
|
-int stop;
|
|
|
-{
|
|
|
- int val;
|
|
|
- register char *fastmap = bufp->fastmap;
|
|
|
- register RE_TRANSLATE_TYPE translate = bufp->translate;
|
|
|
- int total_size = size1 + size2;
|
|
|
- int endpos = startpos + range;
|
|
|
|
|
|
-
|
|
|
- if (startpos < 0 || startpos > total_size)
|
|
|
- return -1;
|
|
|
+
|
|
|
+ point in case we are followed by a `pop_failure_jump', because
|
|
|
+ we don't want the failure point for the alternative to be
|
|
|
+ popped. For example, matching `(a|ab)*' against `aab'
|
|
|
+ requires that we match the `ab' alternative. */
|
|
|
+ case push_dummy_failure:
|
|
|
+ DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
|
|
|
+
|
|
|
+ two zeroes. */
|
|
|
+ PUSH_FAILURE_POINT(NULL, NULL, -2);
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- the virtual concatenation of STRING1 and STRING2.
|
|
|
- Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
|
|
|
- if (endpos < 0)
|
|
|
- range = 0 - startpos;
|
|
|
- else if (endpos > total_size)
|
|
|
- range = total_size - startpos;
|
|
|
+
|
|
|
+ After that, handle like `on_failure_jump'. */
|
|
|
+ case succeed_n:
|
|
|
+ EXTRACT_NUMBER(mcnt, p + 2);
|
|
|
+ DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
|
|
|
|
|
|
-
|
|
|
- search for a pattern that must be anchored. */
|
|
|
- if (bufp->used > 0 && range > 0
|
|
|
- && ((re_opcode_t) bufp->buffer[0] == begbuf
|
|
|
-
|
|
|
- || ((re_opcode_t) bufp->buffer[0] == begline
|
|
|
- && !bufp->newline_anchor))) {
|
|
|
- if (startpos > 0)
|
|
|
- return -1;
|
|
|
- else
|
|
|
- range = 1;
|
|
|
- }
|
|
|
-#ifdef emacs
|
|
|
-
|
|
|
- don't keep searching past point. */
|
|
|
- if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
|
|
|
- && range > 0) {
|
|
|
- range = PT - startpos;
|
|
|
- if (range <= 0)
|
|
|
- return -1;
|
|
|
- }
|
|
|
-#endif
|
|
|
+ assert(mcnt >= 0);
|
|
|
+
|
|
|
+ if (mcnt > 0) {
|
|
|
+ mcnt--;
|
|
|
+ p += 2;
|
|
|
+ STORE_NUMBER_AND_INCR(p, mcnt);
|
|
|
+#ifdef _LIBC
|
|
|
+ DEBUG_PRINT3(" Setting %p to %d.\n", p - 2, mcnt);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT3(" Setting 0x%x to %d.\n", p - 2, mcnt);
|
|
|
+#endif
|
|
|
+ } else if (mcnt == 0) {
|
|
|
+#ifdef _LIBC
|
|
|
+ DEBUG_PRINT2(" Setting two bytes from %p to no_op.\n",
|
|
|
+ p + 2);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT2(" Setting two bytes from 0x%x to no_op.\n",
|
|
|
+ p + 2);
|
|
|
+#endif
|
|
|
+ p[2] = (unsigned char) no_op;
|
|
|
+ p[3] = (unsigned char) no_op;
|
|
|
+ goto on_failure;
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- if (fastmap && !bufp->fastmap_accurate)
|
|
|
- if (re_compile_fastmap(bufp) == -2)
|
|
|
- return -2;
|
|
|
+ case jump_n:
|
|
|
+ EXTRACT_NUMBER(mcnt, p + 2);
|
|
|
+ DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
|
|
|
|
|
|
-
|
|
|
- for (;;) {
|
|
|
-
|
|
|
- cannot be the start of a match. If the pattern can match the
|
|
|
- null string, however, we don't need to skip characters; we want
|
|
|
- the first null string. */
|
|
|
- if (fastmap && startpos < total_size && !bufp->can_be_null) {
|
|
|
- if (range > 0) {
|
|
|
- register const char *d;
|
|
|
- register int lim = 0;
|
|
|
- int irange = range;
|
|
|
+
|
|
|
+ if (mcnt) {
|
|
|
+ mcnt--;
|
|
|
+ STORE_NUMBER(p + 2, mcnt);
|
|
|
+#ifdef _LIBC
|
|
|
+ DEBUG_PRINT3(" Setting %p to %d.\n", p + 2, mcnt);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT3(" Setting 0x%x to %d.\n", p + 2, mcnt);
|
|
|
+#endif
|
|
|
+ goto unconditional_jump;
|
|
|
+ }
|
|
|
+
|
|
|
+ else
|
|
|
+ p += 4;
|
|
|
+ break;
|
|
|
|
|
|
- if (startpos < size1 && startpos + range >= size1)
|
|
|
- lim = range - (size1 - startpos);
|
|
|
+ case set_number_at:
|
|
|
+ {
|
|
|
+ DEBUG_PRINT1("EXECUTING set_number_at.\n");
|
|
|
|
|
|
- d =
|
|
|
- (startpos >=
|
|
|
- size1 ? string2 - size1 : string1) + startpos;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
+ p1 = p + mcnt;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
+#ifdef _LIBC
|
|
|
+ DEBUG_PRINT3(" Setting %p to %d.\n", p1, mcnt);
|
|
|
+#else
|
|
|
+ DEBUG_PRINT3(" Setting 0x%x to %d.\n", p1, mcnt);
|
|
|
+#endif
|
|
|
+ STORE_NUMBER(p1, mcnt);
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- inside the loop. */
|
|
|
- if (translate)
|
|
|
- while (range > lim && !fastmap[(unsigned char)
|
|
|
- translate[
|
|
|
- (unsigned
|
|
|
- char) *d++]])
|
|
|
- range--;
|
|
|
- else
|
|
|
- while (range > lim && !fastmap[(unsigned char) *d++])
|
|
|
- range--;
|
|
|
+#if 0
|
|
|
+
|
|
|
+ test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
|
|
|
+ AT_WORD_BOUNDARY, so this code is disabled. Expanding the
|
|
|
+ macro and introducing temporary variables works around the bug. */
|
|
|
+
|
|
|
+ case wordbound:
|
|
|
+ DEBUG_PRINT1("EXECUTING wordbound.\n");
|
|
|
+ if (AT_WORD_BOUNDARY(d))
|
|
|
+ break;
|
|
|
+ goto fail;
|
|
|
|
|
|
- startpos += irange - range;
|
|
|
- } else {
|
|
|
+ case notwordbound:
|
|
|
+ DEBUG_PRINT1("EXECUTING notwordbound.\n");
|
|
|
+ if (AT_WORD_BOUNDARY(d))
|
|
|
+ goto fail;
|
|
|
+ break;
|
|
|
+#else
|
|
|
+ case wordbound:
|
|
|
+ {
|
|
|
+ boolean prevchar, thischar;
|
|
|
|
|
|
- register char c = (size1 == 0 || startpos >= size1
|
|
|
- ? string2[startpos - size1]
|
|
|
- : string1[startpos]);
|
|
|
+ DEBUG_PRINT1("EXECUTING wordbound.\n");
|
|
|
+ if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d))
|
|
|
+ break;
|
|
|
|
|
|
- if (!fastmap[(unsigned char) TRANSLATE(c)])
|
|
|
- goto advance;
|
|
|
- }
|
|
|
+ prevchar = WORDCHAR_P(d - 1);
|
|
|
+ thischar = WORDCHAR_P(d);
|
|
|
+ if (prevchar != thischar)
|
|
|
+ break;
|
|
|
+ goto fail;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
- if (range >= 0 && startpos == total_size && fastmap
|
|
|
- && !bufp->can_be_null) return -1;
|
|
|
-
|
|
|
- val = re_match_2_internal(bufp, string1, size1, string2, size2,
|
|
|
- startpos, regs, stop);
|
|
|
-#ifndef REGEX_MALLOC
|
|
|
-# ifdef C_ALLOCA
|
|
|
- alloca(0);
|
|
|
-# endif
|
|
|
-#endif
|
|
|
-
|
|
|
- if (val >= 0)
|
|
|
- return startpos;
|
|
|
+ case notwordbound:
|
|
|
+ {
|
|
|
+ boolean prevchar, thischar;
|
|
|
|
|
|
- if (val == -2)
|
|
|
- return -2;
|
|
|
+ DEBUG_PRINT1("EXECUTING notwordbound.\n");
|
|
|
+ if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d))
|
|
|
+ goto fail;
|
|
|
|
|
|
- advance:
|
|
|
- if (!range)
|
|
|
+ prevchar = WORDCHAR_P(d - 1);
|
|
|
+ thischar = WORDCHAR_P(d);
|
|
|
+ if (prevchar != thischar)
|
|
|
+ goto fail;
|
|
|
break;
|
|
|
- else if (range > 0) {
|
|
|
- range--;
|
|
|
- startpos++;
|
|
|
- } else {
|
|
|
- range++;
|
|
|
- startpos--;
|
|
|
}
|
|
|
- }
|
|
|
- return -1;
|
|
|
-}
|
|
|
-
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__re_search_2, re_search_2)
|
|
|
-#endif
|
|
|
-
|
|
|
- and `string2' into an offset from the beginning of that string. */
|
|
|
-#define POINTER_TO_OFFSET(ptr) \
|
|
|
- (FIRST_STRING_P (ptr) \
|
|
|
- ? ((regoff_t) ((ptr) - string1)) \
|
|
|
- : ((regoff_t) ((ptr) - string2 + size1)))
|
|
|
-
|
|
|
-#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
|
|
|
-
|
|
|
- string2 if necessary. */
|
|
|
-#define PREFETCH() \
|
|
|
- while (d == dend) \
|
|
|
- { \
|
|
|
- \
|
|
|
- if (dend == end_match_2) \
|
|
|
- goto fail; \
|
|
|
- \
|
|
|
- d = string2; \
|
|
|
- dend = end_match_2; \
|
|
|
- }
|
|
|
-
|
|
|
- of `string1' and `string2'. If only one string, it's `string2'. */
|
|
|
-#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
|
|
|
-#define AT_STRINGS_END(d) ((d) == end2)
|
|
|
-
|
|
|
- two special cases to check for: if past the end of string1, look at
|
|
|
- the first character in string2; and if before the beginning of
|
|
|
- string2, look at the last character in string1. */
|
|
|
-#define WORDCHAR_P(d) \
|
|
|
- (SYNTAX ((d) == end1 ? *string2 \
|
|
|
- : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
|
|
|
- == Sword)
|
|
|
-
|
|
|
-#if 0
|
|
|
-
|
|
|
- to being word-constituent. */
|
|
|
-#define AT_WORD_BOUNDARY(d) \
|
|
|
- (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
|
|
|
- || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
|
|
|
#endif
|
|
|
-
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
-# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
|
|
|
-# define FREE_VARIABLES() \
|
|
|
- do { \
|
|
|
- REGEX_FREE_STACK (fail_stack.stack); \
|
|
|
- FREE_VAR (regstart); \
|
|
|
- FREE_VAR (regend); \
|
|
|
- FREE_VAR (old_regstart); \
|
|
|
- FREE_VAR (old_regend); \
|
|
|
- FREE_VAR (best_regstart); \
|
|
|
- FREE_VAR (best_regend); \
|
|
|
- FREE_VAR (reg_info); \
|
|
|
- FREE_VAR (reg_dummy); \
|
|
|
- FREE_VAR (reg_info_dummy); \
|
|
|
- } while (0)
|
|
|
-#else
|
|
|
-# define FREE_VARIABLES() ((void)0)
|
|
|
-#endif
|
|
|
-
|
|
|
- register values; since we have a limit of 255 registers (because
|
|
|
- we use only one byte in the pattern for the register number), we can
|
|
|
- use numbers larger than 255. They must differ by 1, because of
|
|
|
- NUM_FAILURE_ITEMS above. And the value for the lowest register must
|
|
|
- be larger than the value for the highest register, so we do not try
|
|
|
- to actually save any registers when none are active. */
|
|
|
-#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
|
|
|
-#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
|
|
|
-
|
|
|
-#ifndef emacs
|
|
|
-
|
|
|
-int re_match(bufp, string, size, pos, regs)
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-const char *string;
|
|
|
-int size, pos;
|
|
|
-struct re_registers *regs;
|
|
|
-{
|
|
|
- int result = re_match_2_internal(bufp, NULL, 0, string, size,
|
|
|
- pos, regs, size);
|
|
|
|
|
|
-# ifndef REGEX_MALLOC
|
|
|
-# ifdef C_ALLOCA
|
|
|
- alloca(0);
|
|
|
-# endif
|
|
|
-# endif
|
|
|
- return result;
|
|
|
-}
|
|
|
+ case wordbeg:
|
|
|
+ DEBUG_PRINT1("EXECUTING wordbeg.\n");
|
|
|
+ if (WORDCHAR_P(d) && (AT_STRINGS_BEG(d) || !WORDCHAR_P(d - 1)))
|
|
|
+ break;
|
|
|
+ goto fail;
|
|
|
|
|
|
-# ifdef _LIBC
|
|
|
-weak_alias(__re_match, re_match)
|
|
|
-# endif
|
|
|
-#endif
|
|
|
-static boolean group_match_null_string_p _RE_ARGS((unsigned char **p,
|
|
|
- unsigned char *end,
|
|
|
- register_info_type *
|
|
|
+ case wordend:
|
|
|
+ DEBUG_PRINT1("EXECUTING wordend.\n");
|
|
|
+ if (!AT_STRINGS_BEG(d) && WORDCHAR_P(d - 1)
|
|
|
+ && (!WORDCHAR_P(d) || AT_STRINGS_END(d)))
|
|
|
+ break;
|
|
|
+ goto fail;
|
|
|
|
|
|
- reg_info));
|
|
|
-static boolean alt_match_null_string_p
|
|
|
-_RE_ARGS(
|
|
|
+#ifdef emacs
|
|
|
+ case before_dot:
|
|
|
+ DEBUG_PRINT1("EXECUTING before_dot.\n");
|
|
|
+ if (PTR_CHAR_POS((unsigned char *) d) >= point)
|
|
|
+ goto fail;
|
|
|
+ break;
|
|
|
|
|
|
- (unsigned char *p, unsigned char *end,
|
|
|
- register_info_type * reg_info));
|
|
|
-static boolean common_op_match_null_string_p
|
|
|
-_RE_ARGS(
|
|
|
+ case at_dot:
|
|
|
+ DEBUG_PRINT1("EXECUTING at_dot.\n");
|
|
|
+ if (PTR_CHAR_POS((unsigned char *) d) != point)
|
|
|
+ goto fail;
|
|
|
+ break;
|
|
|
|
|
|
- (unsigned char **p, unsigned char *end,
|
|
|
- register_info_type * reg_info));
|
|
|
-static int bcmp_translate
|
|
|
-_RE_ARGS((const char *s1, const char *s2, int len, char *translate));
|
|
|
+ case after_dot:
|
|
|
+ DEBUG_PRINT1("EXECUTING after_dot.\n");
|
|
|
+ if (PTR_CHAR_POS((unsigned char *) d) <= point)
|
|
|
+ goto fail;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
|
|
|
- and SIZE2, respectively). We start matching at POS, and stop
|
|
|
- matching at STOP.
|
|
|
+ case syntaxspec:
|
|
|
+ DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
|
|
|
+ mcnt = *p++;
|
|
|
+ goto matchsyntax;
|
|
|
|
|
|
- If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
|
|
|
- store offsets for the substring each group matched in REGS. See the
|
|
|
- documentation for exactly how many groups we fill.
|
|
|
+ case wordchar:
|
|
|
+ DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
|
|
|
+ mcnt = (int) Sword;
|
|
|
+ matchsyntax:
|
|
|
+ PREFETCH();
|
|
|
+
|
|
|
+ d++;
|
|
|
+ if (SYNTAX(d[-1]) != (enum syntaxcode) mcnt)
|
|
|
+ goto fail;
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ break;
|
|
|
|
|
|
- We return -1 if no match, -2 if an internal error (such as the
|
|
|
- failure stack overflowing). Otherwise, we return the length of the
|
|
|
- matched substring. */
|
|
|
+ case notsyntaxspec:
|
|
|
+ DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
|
|
|
+ mcnt = *p++;
|
|
|
+ goto matchnotsyntax;
|
|
|
|
|
|
-int re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop)
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-const char *string1, *string2;
|
|
|
-int size1, size2;
|
|
|
-int pos;
|
|
|
-struct re_registers *regs;
|
|
|
-int stop;
|
|
|
-{
|
|
|
- int result = re_match_2_internal(bufp, string1, size1, string2, size2,
|
|
|
- pos, regs, stop);
|
|
|
+ case notwordchar:
|
|
|
+ DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
|
|
|
+ mcnt = (int) Sword;
|
|
|
+ matchnotsyntax:
|
|
|
+ PREFETCH();
|
|
|
+
|
|
|
+ d++;
|
|
|
+ if (SYNTAX(d[-1]) == (enum syntaxcode) mcnt)
|
|
|
+ goto fail;
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ break;
|
|
|
|
|
|
-#ifndef REGEX_MALLOC
|
|
|
-# ifdef C_ALLOCA
|
|
|
- alloca(0);
|
|
|
-# endif
|
|
|
-#endif
|
|
|
- return result;
|
|
|
-}
|
|
|
+#else
|
|
|
+ case wordchar:
|
|
|
+ DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
|
|
|
+ PREFETCH();
|
|
|
+ if (!WORDCHAR_P(d))
|
|
|
+ goto fail;
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ d++;
|
|
|
+ break;
|
|
|
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__re_match_2, re_match_2)
|
|
|
-#endif
|
|
|
-
|
|
|
- afterwards. */
|
|
|
-static int
|
|
|
-re_match_2_internal(bufp, string1, size1, string2, size2, pos, regs, stop)
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-const char *string1, *string2;
|
|
|
-int size1, size2;
|
|
|
-int pos;
|
|
|
-struct re_registers *regs;
|
|
|
-int stop;
|
|
|
-{
|
|
|
-
|
|
|
- int mcnt;
|
|
|
- unsigned char *p1;
|
|
|
+ case notwordchar:
|
|
|
+ DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
|
|
|
+ PREFETCH();
|
|
|
+ if (WORDCHAR_P(d))
|
|
|
+ goto fail;
|
|
|
+ SET_REGS_MATCHED();
|
|
|
+ d++;
|
|
|
+ break;
|
|
|
+#endif
|
|
|
|
|
|
-
|
|
|
- const char *end1, *end2;
|
|
|
+ default:
|
|
|
+ abort();
|
|
|
+ }
|
|
|
+ continue;
|
|
|
|
|
|
-
|
|
|
- each to consider matching. */
|
|
|
- const char *end_match_1, *end_match_2;
|
|
|
|
|
|
-
|
|
|
- const char *d, *dend;
|
|
|
+
|
|
|
+ fail:
|
|
|
+ if (!FAIL_STACK_EMPTY()) {
|
|
|
+ DEBUG_PRINT1("\nFAIL:\n");
|
|
|
+ POP_FAILURE_POINT(d, p,
|
|
|
+ lowest_active_reg, highest_active_reg,
|
|
|
+ regstart, regend, reg_info);
|
|
|
|
|
|
-
|
|
|
- unsigned char *p = bufp->buffer;
|
|
|
- register unsigned char *pend = p + bufp->used;
|
|
|
+
|
|
|
+ if (!p)
|
|
|
+ goto fail;
|
|
|
|
|
|
-
|
|
|
- empty subpattern when we get to the stop_memory. */
|
|
|
- unsigned char *just_past_start_mem = 0;
|
|
|
+
|
|
|
+ assert(p <= pend);
|
|
|
+ if (p < pend) {
|
|
|
+ boolean is_a_jump_n = false;
|
|
|
|
|
|
-
|
|
|
- RE_TRANSLATE_TYPE translate = bufp->translate;
|
|
|
+
|
|
|
+ loop, need to pop this failure point and use the next one. */
|
|
|
+ switch ((re_opcode_t) * p) {
|
|
|
+ case jump_n:
|
|
|
+ is_a_jump_n = true;
|
|
|
+ case maybe_pop_jump:
|
|
|
+ case pop_failure_jump:
|
|
|
+ case jump:
|
|
|
+ p1 = p + 1;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ p1 += mcnt;
|
|
|
|
|
|
-
|
|
|
- down the line pushes a failure point on this stack. It consists of
|
|
|
- restart, regend, and reg_info for all registers corresponding to
|
|
|
- the subexpressions we're currently inside, plus the number of such
|
|
|
- registers, and, finally, two char *'s. The first char * is where
|
|
|
- to resume scanning the pattern; the second one is where to resume
|
|
|
- scanning the strings. If the latter is zero, the failure point is
|
|
|
- a ``dummy''; if a failure happens and the failure point is a dummy,
|
|
|
- it gets discarded and the next next one is tried. */
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
- fail_stack_type fail_stack;
|
|
|
-#endif
|
|
|
-#ifdef DEBUG
|
|
|
- static unsigned failure_id;
|
|
|
- unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
|
|
|
-#endif
|
|
|
+ if ((is_a_jump_n && (re_opcode_t) * p1 == succeed_n)
|
|
|
+ || (!is_a_jump_n
|
|
|
+ && (re_opcode_t) * p1 == on_failure_jump))
|
|
|
+ goto fail;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ ;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
-#ifdef REL_ALLOC
|
|
|
-
|
|
|
- it is allocated relocatably. */
|
|
|
- fail_stack_elt_t *failure_stack_ptr;
|
|
|
-#endif
|
|
|
+ if (d >= string1 && d <= end1)
|
|
|
+ dend = end_match_1;
|
|
|
+ } else
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- return, for use in backreferences. The number here includes
|
|
|
- an element for register zero. */
|
|
|
- size_t num_regs = bufp->re_nsub + 1;
|
|
|
+ if (best_regs_set)
|
|
|
+ goto restore_best_regs;
|
|
|
|
|
|
-
|
|
|
- active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
|
|
|
- active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
|
|
|
+ FREE_VARIABLES();
|
|
|
|
|
|
-
|
|
|
- the input strings; they record just what was matched (on this
|
|
|
- attempt) by a subexpression part of the pattern, that is, the
|
|
|
- regnum-th regstart pointer points to where in the pattern we began
|
|
|
- matching and the regnum-th regend points to right after where we
|
|
|
- stopped matching the regnum-th subexpression. (The zeroth register
|
|
|
- keeps track of what the whole pattern matches.) */
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
- const char **regstart, **regend;
|
|
|
-#endif
|
|
|
+ return -1;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
|
|
|
-
|
|
|
- match anything, then the register for its start will need to be
|
|
|
- restored because it will have been set to wherever in the string we
|
|
|
- are when we last see its open-group operator. Similarly for a
|
|
|
- register's end. */
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
- const char **old_regstart, **old_regend;
|
|
|
-#endif
|
|
|
|
|
|
-
|
|
|
- nested) subexpressions we are currently in. The matched_something
|
|
|
- field of reg_info[reg_num] helps us tell whether or not we have
|
|
|
- matched any of the pattern so far this time through the reg_num-th
|
|
|
- subexpression. These two fields get reset each time through any
|
|
|
- loop their register is in. */
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
- register_info_type *reg_info;
|
|
|
-#endif
|
|
|
+
|
|
|
|
|
|
-
|
|
|
- variables when we find a match better than any we've seen before.
|
|
|
- This happens as we backtrack through the failure points, which in
|
|
|
- turn happens only if we have not yet matched the entire string. */
|
|
|
- unsigned best_regs_set = false;
|
|
|
+ Return true if the pattern up to the corresponding stop_memory can
|
|
|
+ match the empty string, and false otherwise.
|
|
|
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
- const char **best_regstart, **best_regend;
|
|
|
-#endif
|
|
|
+ If we find the matching stop_memory, sets P to point to one past its number.
|
|
|
+ Otherwise, sets P to an undefined byte less than or equal to END.
|
|
|
|
|
|
-
|
|
|
- allocate space for that if we're not allocating space for anything
|
|
|
- else (see below). Also, we never need info about register 0 for
|
|
|
- any of the other register vectors, and it seems rather a kludge to
|
|
|
- treat `best_regend' differently than the rest. So we keep track of
|
|
|
- the end of the best match so far in a separate variable. We
|
|
|
- initialize this to NULL so that when we backtrack the first time
|
|
|
- and need to test it, it's not garbage. */
|
|
|
- const char *match_end = NULL;
|
|
|
+ We don't handle duplicates properly (yet). */
|
|
|
|
|
|
-
|
|
|
- int set_regs_matched_done = 0;
|
|
|
+static boolean group_match_null_string_p(p, end, reg_info)
|
|
|
+unsigned char **p, *end;
|
|
|
+register_info_type *reg_info;
|
|
|
+{
|
|
|
+ int mcnt;
|
|
|
|
|
|
-
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
- const char **reg_dummy;
|
|
|
- register_info_type *reg_info_dummy;
|
|
|
-#endif
|
|
|
+
|
|
|
+ unsigned char *p1 = *p + 2;
|
|
|
|
|
|
-#ifdef DEBUG
|
|
|
-
|
|
|
- unsigned num_regs_pushed = 0;
|
|
|
-#endif
|
|
|
+ while (p1 < end) {
|
|
|
+
|
|
|
+ false, as appropriate, when we get to one that can't, or to the
|
|
|
+ matching stop_memory. */
|
|
|
|
|
|
- DEBUG_PRINT1("\n\nEntering re_match_2.\n");
|
|
|
+ switch ((re_opcode_t) * p1) {
|
|
|
+
|
|
|
+ case on_failure_jump:
|
|
|
+ p1++;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
|
|
|
- INIT_FAIL_STACK();
|
|
|
+
|
|
|
+ pattern. */
|
|
|
|
|
|
-#ifdef MATCH_MAY_ALLOCATE
|
|
|
-
|
|
|
- no groups in the pattern, as it takes a fair amount of time. If
|
|
|
- there are groups, we include space for register 0 (the whole
|
|
|
- pattern), even though we never use it, since it simplifies the
|
|
|
- array indexing. We should fix this. */
|
|
|
- if (bufp->re_nsub) {
|
|
|
- regstart = REGEX_TALLOC(num_regs, const char *);
|
|
|
- regend = REGEX_TALLOC(num_regs, const char *);
|
|
|
- old_regstart = REGEX_TALLOC(num_regs, const char *);
|
|
|
- old_regend = REGEX_TALLOC(num_regs, const char *);
|
|
|
- best_regstart = REGEX_TALLOC(num_regs, const char *);
|
|
|
- best_regend = REGEX_TALLOC(num_regs, const char *);
|
|
|
+ if (mcnt >= 0) {
|
|
|
+
|
|
|
+ seeing if any of the alternatives cannot match nothing.
|
|
|
+ The last alternative starts with only a jump,
|
|
|
+ whereas the rest start with on_failure_jump and end
|
|
|
+ with a jump, e.g., here is the pattern for `a|b|c':
|
|
|
|
|
|
- reg_info = REGEX_TALLOC(num_regs, register_info_type);
|
|
|
- reg_dummy = REGEX_TALLOC(num_regs, const char *);
|
|
|
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
|
|
|
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
|
|
|
+ /exactn/1/c
|
|
|
|
|
|
- reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
|
|
|
+ So, we have to first go through the first (n-1)
|
|
|
+ alternatives and then deal with the last one separately. */
|
|
|
|
|
|
- if (!(regstart && regend && old_regstart && old_regend && reg_info
|
|
|
- && best_regstart && best_regend && reg_dummy
|
|
|
- && reg_info_dummy)) {
|
|
|
- FREE_VARIABLES();
|
|
|
- return -2;
|
|
|
- }
|
|
|
- } else {
|
|
|
-
|
|
|
- `FREE_VARIABLES' doesn't try to free them. */
|
|
|
- regstart = regend = old_regstart = old_regend = best_regstart
|
|
|
- = best_regend = reg_dummy = NULL;
|
|
|
- reg_info = reg_info_dummy = (register_info_type *) NULL;
|
|
|
- }
|
|
|
-#endif
|
|
|
|
|
|
-
|
|
|
- if (pos < 0 || pos > size1 + size2) {
|
|
|
- FREE_VARIABLES();
|
|
|
- return -1;
|
|
|
- }
|
|
|
+
|
|
|
+ with an on_failure_jump (see above) that jumps to right
|
|
|
+ past a jump_past_alt. */
|
|
|
|
|
|
-
|
|
|
- start_memory/stop_memory has been seen for. Also initialize the
|
|
|
- register information struct. */
|
|
|
- for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
|
|
|
- regstart[mcnt] = regend[mcnt]
|
|
|
- = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
|
|
|
+ while ((re_opcode_t) p1[mcnt - 3] == jump_past_alt) {
|
|
|
+
|
|
|
+ is, including the ending `jump_past_alt' and
|
|
|
+ its number. */
|
|
|
|
|
|
- REG_MATCH_NULL_STRING_P(reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
|
|
|
- IS_ACTIVE(reg_info[mcnt]) = 0;
|
|
|
- MATCHED_SOMETHING(reg_info[mcnt]) = 0;
|
|
|
- EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
|
|
|
- }
|
|
|
+ if (!alt_match_null_string_p(p1, p1 + mcnt - 3,
|
|
|
+ reg_info)) return false;
|
|
|
|
|
|
-
|
|
|
- `string1' is null. */
|
|
|
- if (size2 == 0 && string1 != NULL) {
|
|
|
- string2 = string1;
|
|
|
- size2 = size1;
|
|
|
- string1 = 0;
|
|
|
- size1 = 0;
|
|
|
- }
|
|
|
- end1 = string1 + size1;
|
|
|
- end2 = string2 + size2;
|
|
|
+
|
|
|
+ jump_past_alt. */
|
|
|
+ p1 += mcnt;
|
|
|
|
|
|
-
|
|
|
- if (stop <= size1) {
|
|
|
- end_match_1 = string1 + stop;
|
|
|
- end_match_2 = string2;
|
|
|
- } else {
|
|
|
- end_match_1 = end1;
|
|
|
- end_match_2 = string2 + stop - size1;
|
|
|
- }
|
|
|
+
|
|
|
+ that doesn't begin with an on_failure_jump. */
|
|
|
+ if ((re_opcode_t) * p1 != on_failure_jump)
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- `dend' is the end of the input string that `d' points within. `d'
|
|
|
- is advanced into the following input string whenever necessary, but
|
|
|
- this happens before fetching; therefore, at the beginning of the
|
|
|
- loop, `d' can be pointing at the end of a string, but it cannot
|
|
|
- equal `string2'. */
|
|
|
- if (size1 > 0 && pos <= size1) {
|
|
|
- d = string1 + pos;
|
|
|
- dend = end_match_1;
|
|
|
- } else {
|
|
|
- d = string2 + pos - size1;
|
|
|
- dend = end_match_2;
|
|
|
- }
|
|
|
+
|
|
|
+ alternative that starts with an on_failure_jump. */
|
|
|
+ p1++;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ if ((re_opcode_t) p1[mcnt - 3] != jump_past_alt) {
|
|
|
+
|
|
|
+ p1 -= 3;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- DEBUG_PRINT1("The compiled pattern is:\n");
|
|
|
- DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
|
|
|
- DEBUG_PRINT1("The string to match is: `");
|
|
|
- DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
|
|
|
- DEBUG_PRINT1("'\n");
|
|
|
+
|
|
|
+ of the `jump_past_alt' just before it. `mcnt' contains
|
|
|
+ the length of the alternative. */
|
|
|
+ EXTRACT_NUMBER(mcnt, p1 - 2);
|
|
|
|
|
|
-
|
|
|
- function if the match is complete, or it drops through if the match
|
|
|
- fails at this starting point in the input data. */
|
|
|
- for (;;) {
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT2("\n%p: ", p);
|
|
|
-#else
|
|
|
- DEBUG_PRINT2("\n0x%x: ", p);
|
|
|
-#endif
|
|
|
+ if (!alt_match_null_string_p(p1, p1 + mcnt, reg_info))
|
|
|
+ return false;
|
|
|
|
|
|
- if (p == pend) {
|
|
|
- DEBUG_PRINT1("end of pattern ... ");
|
|
|
+ p1 += mcnt;
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- longest match, try backtracking. */
|
|
|
- if (d != end_match_2) {
|
|
|
-
|
|
|
- as the best previous match. */
|
|
|
- boolean same_str_p = (FIRST_STRING_P(match_end)
|
|
|
- == MATCHING_IN_FIRST_STRING);
|
|
|
|
|
|
-
|
|
|
- boolean best_match_p;
|
|
|
+ case stop_memory:
|
|
|
+ assert(p1[1] == **p);
|
|
|
+ *p = p1 + 2;
|
|
|
+ return true;
|
|
|
|
|
|
-
|
|
|
- with the previous declaration. */
|
|
|
- if (same_str_p)
|
|
|
- best_match_p = d > match_end;
|
|
|
- else
|
|
|
- best_match_p = !MATCHING_IN_FIRST_STRING;
|
|
|
|
|
|
- DEBUG_PRINT1("backtracking.\n");
|
|
|
+ default:
|
|
|
+ if (!common_op_match_null_string_p(&p1, end, reg_info))
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- if (!FAIL_STACK_EMPTY()) {
|
|
|
+ return false;
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- if (!best_regs_set || best_match_p) {
|
|
|
- best_regs_set = true;
|
|
|
- match_end = d;
|
|
|
|
|
|
- DEBUG_PRINT1("\nSAVING match as best so far.\n");
|
|
|
+
|
|
|
+ It expects P to be the first byte of a single alternative and END one
|
|
|
+ byte past the last. The alternative can contain groups. */
|
|
|
|
|
|
- for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
|
|
|
- best_regstart[mcnt] = regstart[mcnt];
|
|
|
- best_regend[mcnt] = regend[mcnt];
|
|
|
- }
|
|
|
- }
|
|
|
- goto fail;
|
|
|
- }
|
|
|
+static boolean alt_match_null_string_p(p, end, reg_info)
|
|
|
+unsigned char *p, *end;
|
|
|
+register_info_type *reg_info;
|
|
|
+{
|
|
|
+ int mcnt;
|
|
|
+ unsigned char *p1 = p;
|
|
|
|
|
|
-
|
|
|
- last match is real best match, don't restore second
|
|
|
- best one. */
|
|
|
- else if (best_regs_set && !best_match_p) {
|
|
|
- restore_best_regs:
|
|
|
-
|
|
|
- end_match_1' while the restored d is in string2.
|
|
|
- For example, the pattern `x.*y.*z' against the
|
|
|
- strings `x-' and `y-z-', if the two strings are
|
|
|
- not consecutive in memory. */
|
|
|
- DEBUG_PRINT1("Restoring best registers.\n");
|
|
|
+ while (p1 < end) {
|
|
|
+
|
|
|
+ to one that can't. */
|
|
|
|
|
|
- d = match_end;
|
|
|
- dend = ((d >= string1 && d <= end1)
|
|
|
- ? end_match_1 : end_match_2);
|
|
|
+ switch ((re_opcode_t) * p1) {
|
|
|
+
|
|
|
+ case on_failure_jump:
|
|
|
+ p1++;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ p1 += mcnt;
|
|
|
+ break;
|
|
|
|
|
|
- for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
|
|
|
- regstart[mcnt] = best_regstart[mcnt];
|
|
|
- regend[mcnt] = best_regend[mcnt];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- succeed_label:
|
|
|
- DEBUG_PRINT1("Accepting match.\n");
|
|
|
+ default:
|
|
|
+ if (!common_op_match_null_string_p(&p1, end, reg_info))
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- if (regs && !bufp->no_sub) {
|
|
|
-
|
|
|
- if (bufp->regs_allocated == REGS_UNALLOCATED) {
|
|
|
- extra element beyond `num_regs' for the `-1' marker
|
|
|
- GNU code uses. */
|
|
|
- regs->num_regs = MAX(RE_NREGS, num_regs + 1);
|
|
|
- regs->start = TALLOC(regs->num_regs, regoff_t);
|
|
|
- regs->end = TALLOC(regs->num_regs, regoff_t);
|
|
|
- if (regs->start == NULL || regs->end == NULL) {
|
|
|
- FREE_VARIABLES();
|
|
|
- return -2;
|
|
|
- }
|
|
|
- bufp->regs_allocated = REGS_REALLOCATE;
|
|
|
- } else if (bufp->regs_allocated == REGS_REALLOCATE) {
|
|
|
- allocated, reallocate them. If we need fewer, just
|
|
|
- leave it alone. */
|
|
|
- if (regs->num_regs < num_regs + 1) {
|
|
|
- regs->num_regs = num_regs + 1;
|
|
|
- RETALLOC(regs->start, regs->num_regs, regoff_t);
|
|
|
- RETALLOC(regs->end, regs->num_regs, regoff_t);
|
|
|
- if (regs->start == NULL || regs->end == NULL) {
|
|
|
- FREE_VARIABLES();
|
|
|
- return -2;
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
-
|
|
|
- warning under GCC when assert expands to nothing. */
|
|
|
- assert(bufp->regs_allocated == REGS_FIXED);
|
|
|
- }
|
|
|
+ return true;
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- indices. Register zero has to be set differently,
|
|
|
- since we haven't kept track of any info for it. */
|
|
|
- if (regs->num_regs > 0) {
|
|
|
- regs->start[0] = pos;
|
|
|
- regs->end[0] = (MATCHING_IN_FIRST_STRING
|
|
|
- ? ((regoff_t) (d - string1))
|
|
|
- : ((regoff_t) (d - string2 + size1)));
|
|
|
- }
|
|
|
|
|
|
-
|
|
|
- registers, since that is all we initialized. */
|
|
|
- for (mcnt = 1;
|
|
|
- (unsigned) mcnt < MIN(num_regs, regs->num_regs);
|
|
|
- mcnt++) {
|
|
|
- if (REG_UNSET(regstart[mcnt])
|
|
|
- || REG_UNSET(regend[mcnt])) regs->start[mcnt] =
|
|
|
- regs->end[mcnt] = -1;
|
|
|
- else {
|
|
|
- regs->start[mcnt]
|
|
|
- = (regoff_t) POINTER_TO_OFFSET(regstart[mcnt]);
|
|
|
- regs->end[mcnt]
|
|
|
- = (regoff_t) POINTER_TO_OFFSET(regend[mcnt]);
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ alt_match_null_string_p.
|
|
|
|
|
|
-
|
|
|
- were in the pattern, set the extra elements to -1. If
|
|
|
- we (re)allocated the registers, this is the case,
|
|
|
- because we always allocate enough to have at least one
|
|
|
- -1 at the end. */
|
|
|
- for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs;
|
|
|
- mcnt++)
|
|
|
- regs->start[mcnt] = regs->end[mcnt] = -1;
|
|
|
- }
|
|
|
-
|
|
|
- DEBUG_PRINT4
|
|
|
- ("%u failure points pushed, %u popped (%u remain).\n",
|
|
|
- nfailure_points_pushed, nfailure_points_popped,
|
|
|
- nfailure_points_pushed - nfailure_points_popped);
|
|
|
- DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
|
|
|
+ Sets P to one after the op and its arguments, if any. */
|
|
|
+
|
|
|
+static boolean common_op_match_null_string_p(p, end, reg_info)
|
|
|
+unsigned char **p, *end;
|
|
|
+register_info_type *reg_info;
|
|
|
+{
|
|
|
+ int mcnt;
|
|
|
+ boolean ret;
|
|
|
+ int reg_no;
|
|
|
+ unsigned char *p1 = *p;
|
|
|
|
|
|
- mcnt = d - pos - (MATCHING_IN_FIRST_STRING
|
|
|
- ? string1 : string2 - size1);
|
|
|
+ switch ((re_opcode_t) * p1++) {
|
|
|
+ case no_op:
|
|
|
+ case begline:
|
|
|
+ case endline:
|
|
|
+ case begbuf:
|
|
|
+ case endbuf:
|
|
|
+ case wordbeg:
|
|
|
+ case wordend:
|
|
|
+ case wordbound:
|
|
|
+ case notwordbound:
|
|
|
+#ifdef emacs
|
|
|
+ case before_dot:
|
|
|
+ case at_dot:
|
|
|
+ case after_dot:
|
|
|
+#endif
|
|
|
+ break;
|
|
|
|
|
|
- DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
|
|
|
+ case start_memory:
|
|
|
+ reg_no = *p1;
|
|
|
+ assert(reg_no > 0 && reg_no <= MAX_REGNUM);
|
|
|
+ ret = group_match_null_string_p(&p1, end, reg_info);
|
|
|
|
|
|
- FREE_VARIABLES();
|
|
|
- return mcnt;
|
|
|
- }
|
|
|
+
|
|
|
+ contains a group and a back reference to it. */
|
|
|
|
|
|
-
|
|
|
- switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
|
|
|
-
|
|
|
- currently have n == 0. */
|
|
|
- case no_op:
|
|
|
- DEBUG_PRINT1("EXECUTING no_op.\n");
|
|
|
- break;
|
|
|
+ if (REG_MATCH_NULL_STRING_P(reg_info[reg_no]) ==
|
|
|
+ MATCH_NULL_UNSET_VALUE)
|
|
|
+ REG_MATCH_NULL_STRING_P(reg_info[reg_no]) = ret;
|
|
|
|
|
|
- case succeed:
|
|
|
- DEBUG_PRINT1("EXECUTING succeed.\n");
|
|
|
- goto succeed_label;
|
|
|
+ if (!ret)
|
|
|
+ return false;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- byte in the pattern defines n, and the n bytes after that
|
|
|
- are the characters to match. */
|
|
|
- case exactn:
|
|
|
- mcnt = *p++;
|
|
|
- DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
|
|
|
+
|
|
|
+ case jump:
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ if (mcnt >= 0)
|
|
|
+ p1 += mcnt;
|
|
|
+ else
|
|
|
+ return false;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- testing `translate' inside the loop. */
|
|
|
- if (translate) {
|
|
|
- do {
|
|
|
- PREFETCH();
|
|
|
- if ((unsigned char) translate[(unsigned char) *d++]
|
|
|
- != (unsigned char) *p++)
|
|
|
- goto fail;
|
|
|
- }
|
|
|
- while (--mcnt);
|
|
|
- } else {
|
|
|
- do {
|
|
|
- PREFETCH();
|
|
|
- if (*d++ != (char) *p++)
|
|
|
- goto fail;
|
|
|
- }
|
|
|
- while (--mcnt);
|
|
|
- }
|
|
|
- SET_REGS_MATCHED();
|
|
|
- break;
|
|
|
+ case succeed_n:
|
|
|
+
|
|
|
+ p1 += 2;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
|
|
|
+ if (mcnt == 0) {
|
|
|
+ p1 -= 4;
|
|
|
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ p1 += mcnt;
|
|
|
+ } else
|
|
|
+ return false;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- case anychar:
|
|
|
- DEBUG_PRINT1("EXECUTING anychar.\n");
|
|
|
+ case duplicate:
|
|
|
+ if (!REG_MATCH_NULL_STRING_P(reg_info[*p1]))
|
|
|
+ return false;
|
|
|
+ break;
|
|
|
|
|
|
- PREFETCH();
|
|
|
+ case set_number_at:
|
|
|
+ p1 += 4;
|
|
|
|
|
|
- if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE(*d) == '\n')
|
|
|
- || (bufp->syntax & RE_DOT_NOT_NULL
|
|
|
- && TRANSLATE(*d) == '\000')) goto fail;
|
|
|
+ default:
|
|
|
+
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
- SET_REGS_MATCHED();
|
|
|
- DEBUG_PRINT2(" Matched `%d'.\n", *d);
|
|
|
- d++;
|
|
|
- break;
|
|
|
+ *p = p1;
|
|
|
+ return true;
|
|
|
+}
|
|
|
|
|
|
|
|
|
- case charset:
|
|
|
- case charset_not:
|
|
|
- {
|
|
|
- register unsigned char c;
|
|
|
- boolean not = (re_opcode_t) * (p - 1) == charset_not;
|
|
|
+
|
|
|
+ bytes; nonzero otherwise. */
|
|
|
|
|
|
- DEBUG_PRINT2("EXECUTING charset%s.\n", not ? "_not" : "");
|
|
|
+static int bcmp_translate(s1, s2, len, translate)
|
|
|
+const char *s1, *s2;
|
|
|
+register int len;
|
|
|
+RE_TRANSLATE_TYPE translate;
|
|
|
+{
|
|
|
+ register const unsigned char *p1 = (const unsigned char *) s1;
|
|
|
+ register const unsigned char *p2 = (const unsigned char *) s2;
|
|
|
|
|
|
- PREFETCH();
|
|
|
- c = TRANSLATE(*d);
|
|
|
+ while (len) {
|
|
|
+ if (translate[*p1++] != translate[*p2++])
|
|
|
+ return 1;
|
|
|
+ len--;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
|
|
|
-
|
|
|
- bit list is a full 32 bytes long. */
|
|
|
- if (c < (unsigned) (*p * BYTEWIDTH)
|
|
|
- && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
|
|
|
- not = !not;
|
|
|
+
|
|
|
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
|
|
|
+ Returns 0 if the pattern was valid, otherwise an error string.
|
|
|
|
|
|
- p += 1 + *p;
|
|
|
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
|
|
|
+ are set in BUFP on entry.
|
|
|
|
|
|
- if (!not)
|
|
|
- goto fail;
|
|
|
+ We call regex_compile to do the actual compilation. */
|
|
|
|
|
|
- SET_REGS_MATCHED();
|
|
|
- d++;
|
|
|
- break;
|
|
|
- }
|
|
|
+const char *re_compile_pattern(pattern, length, bufp)
|
|
|
+const char *pattern;
|
|
|
+size_t length;
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+{
|
|
|
+ reg_errcode_t ret;
|
|
|
|
|
|
+
|
|
|
+ (and at least one extra will be -1). */
|
|
|
+ bufp->regs_allocated = REGS_UNALLOCATED;
|
|
|
|
|
|
-
|
|
|
- The arguments are the register number in the next byte, and the
|
|
|
- number of groups inner to this one in the next. The text
|
|
|
- matched within the group is recorded (in the internal
|
|
|
- registers data structure) under the register number. */
|
|
|
- case start_memory:
|
|
|
- DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p, p[1]);
|
|
|
+
|
|
|
+ by passing null for the REGS argument to re_match, etc., not by
|
|
|
+ setting no_sub. */
|
|
|
+ bufp->no_sub = 0;
|
|
|
|
|
|
-
|
|
|
- p1 = p;
|
|
|
+
|
|
|
+ bufp->newline_anchor = 1;
|
|
|
|
|
|
- if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
|
|
|
- MATCH_NULL_UNSET_VALUE)
|
|
|
- REG_MATCH_NULL_STRING_P(reg_info[*p]) =
|
|
|
- group_match_null_string_p(&p1, pend, reg_info);
|
|
|
+ ret = regex_compile(pattern, length, re_syntax_options, bufp);
|
|
|
|
|
|
-
|
|
|
- we were at this open-group operator in case the group is
|
|
|
- operated upon by a repetition operator, e.g., with `(a*)*b'
|
|
|
- against `ab'; then we want to ignore where we are now in
|
|
|
- the string in case this attempt to match fails. */
|
|
|
- old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
|
|
|
- ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
|
|
|
- : regstart[*p];
|
|
|
- DEBUG_PRINT2(" old_regstart: %d\n",
|
|
|
- POINTER_TO_OFFSET(old_regstart[*p]));
|
|
|
+ if (!ret)
|
|
|
+ return NULL;
|
|
|
+ return gettext(re_error_msgid + re_error_msgid_idx[(int) ret]);
|
|
|
+}
|
|
|
|
|
|
- regstart[*p] = d;
|
|
|
- DEBUG_PRINT2(" regstart: %d\n",
|
|
|
- POINTER_TO_OFFSET(regstart[*p]));
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__re_compile_pattern, re_compile_pattern)
|
|
|
+#endif
|
|
|
+
|
|
|
+ them unless specifically requested. */
|
|
|
+#if defined _REGEX_RE_COMP || defined _LIBC
|
|
|
+
|
|
|
+static struct re_pattern_buffer re_comp_buf;
|
|
|
|
|
|
- IS_ACTIVE(reg_info[*p]) = 1;
|
|
|
- MATCHED_SOMETHING(reg_info[*p]) = 0;
|
|
|
+char *
|
|
|
+#ifdef _LIBC
|
|
|
+
|
|
|
+ these names if they don't use our functions, and still use
|
|
|
+ regcomp/regexec below without link errors. */ weak_function
|
|
|
+#endif
|
|
|
+re_comp(s)
|
|
|
+const char *s;
|
|
|
+{
|
|
|
+ reg_errcode_t ret;
|
|
|
|
|
|
-
|
|
|
- set_regs_matched_done = 0;
|
|
|
+ if (!s) {
|
|
|
+ if (!re_comp_buf.buffer)
|
|
|
+ return gettext("No previous regular expression");
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- highest_active_reg = *p;
|
|
|
+ if (!re_comp_buf.buffer) {
|
|
|
+ re_comp_buf.buffer = (unsigned char *) malloc(200);
|
|
|
+ if (re_comp_buf.buffer == NULL)
|
|
|
+ return (char *) gettext(re_error_msgid
|
|
|
+ +
|
|
|
+ re_error_msgid_idx[(int) REG_ESPACE]);
|
|
|
+ re_comp_buf.allocated = 200;
|
|
|
|
|
|
-
|
|
|
- register. */
|
|
|
- if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
|
|
|
- lowest_active_reg = *p;
|
|
|
+ re_comp_buf.fastmap = (char *) malloc(1 << BYTEWIDTH);
|
|
|
+ if (re_comp_buf.fastmap == NULL)
|
|
|
+ return (char *) gettext(re_error_msgid
|
|
|
+ +
|
|
|
+ re_error_msgid_idx[(int) REG_ESPACE]);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ don't need to initialize the pattern buffer fields which affect it. */
|
|
|
|
|
|
-
|
|
|
- p += 2;
|
|
|
- just_past_start_mem = p;
|
|
|
+
|
|
|
+ re_comp_buf.newline_anchor = 1;
|
|
|
|
|
|
- break;
|
|
|
+ ret = regex_compile(s, strlen(s), re_syntax_options, &re_comp_buf);
|
|
|
|
|
|
+ if (!ret)
|
|
|
+ return NULL;
|
|
|
|
|
|
-
|
|
|
- arguments are the same as start_memory's: the register
|
|
|
- number, and the number of inner groups. */
|
|
|
- case stop_memory:
|
|
|
- DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
|
|
|
+
|
|
|
+ return (char *) gettext(re_error_msgid +
|
|
|
+ re_error_msgid_idx[(int) ret]);
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- this close-group operator in case the group is operated
|
|
|
- upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
|
|
|
- against `aba'; then we want to ignore where we are now in
|
|
|
- the string in case this attempt to match fails. */
|
|
|
- old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
|
|
|
- ? REG_UNSET(regend[*p]) ? d : regend[*p]
|
|
|
- : regend[*p];
|
|
|
- DEBUG_PRINT2(" old_regend: %d\n",
|
|
|
- POINTER_TO_OFFSET(old_regend[*p]));
|
|
|
|
|
|
- regend[*p] = d;
|
|
|
- DEBUG_PRINT2(" regend: %d\n",
|
|
|
- POINTER_TO_OFFSET(regend[*p]));
|
|
|
+int
|
|
|
+#ifdef _LIBC
|
|
|
+ weak_function
|
|
|
+#endif
|
|
|
+re_exec(s)
|
|
|
+const char *s;
|
|
|
+{
|
|
|
+ const int len = strlen(s);
|
|
|
|
|
|
-
|
|
|
- IS_ACTIVE(reg_info[*p]) = 0;
|
|
|
+ return
|
|
|
+ 0 <= re_search(&re_comp_buf, s, len, 0, len,
|
|
|
+ (struct re_registers *) 0);
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- set_regs_matched_done = 0;
|
|
|
+#endif
|
|
|
+
|
|
|
+
|
|
|
|
|
|
-
|
|
|
- anymore. */
|
|
|
- if (lowest_active_reg == highest_active_reg) {
|
|
|
- lowest_active_reg = NO_LOWEST_ACTIVE_REG;
|
|
|
- highest_active_reg = NO_HIGHEST_ACTIVE_REG;
|
|
|
- } else {
|
|
|
- it isn't necessarily one less than now: consider
|
|
|
- (a(b)c(d(e)f)g). When group 3 ends, after the f), the
|
|
|
- new highest active register is 1. */
|
|
|
- unsigned char r = *p - 1;
|
|
|
+#ifndef emacs
|
|
|
|
|
|
- while (r > 0 && !IS_ACTIVE(reg_info[r]))
|
|
|
- r--;
|
|
|
+
|
|
|
|
|
|
-
|
|
|
- the registers as the result of an `on_failure_jump', not
|
|
|
- a `start_memory', and we jumped to past the innermost
|
|
|
- `stop_memory'. For example, in ((.)*) we save
|
|
|
- registers 1 and 2 as a result of the *, but when we pop
|
|
|
- back to the second ), we are at the stop_memory 1.
|
|
|
- Thus, nothing is active. */
|
|
|
- if (r == 0) {
|
|
|
- lowest_active_reg = NO_LOWEST_ACTIVE_REG;
|
|
|
- highest_active_reg = NO_HIGHEST_ACTIVE_REG;
|
|
|
- } else
|
|
|
- highest_active_reg = r;
|
|
|
- }
|
|
|
+ PREG is a regex_t *. We do not expect any fields to be initialized,
|
|
|
+ since POSIX says we shouldn't. Thus, we set
|
|
|
|
|
|
-
|
|
|
- group that's operated on by a repetition operator, try to
|
|
|
- force exit from the ``loop'', and restore the register
|
|
|
- information for this group that we had before trying this
|
|
|
- last match. */
|
|
|
- if ((!MATCHED_SOMETHING(reg_info[*p])
|
|
|
- || just_past_start_mem == p - 1)
|
|
|
- && (p + 2) < pend) {
|
|
|
- boolean is_a_jump_n = false;
|
|
|
+ `buffer' to the compiled pattern;
|
|
|
+ `used' to the length of the compiled pattern;
|
|
|
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
|
|
|
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
|
|
|
+ RE_SYNTAX_POSIX_BASIC;
|
|
|
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
|
|
|
+ `fastmap' to an allocated space for the fastmap;
|
|
|
+ `fastmap_accurate' to zero;
|
|
|
+ `re_nsub' to the number of subexpressions in PATTERN.
|
|
|
|
|
|
- p1 = p + 2;
|
|
|
- mcnt = 0;
|
|
|
- switch ((re_opcode_t) * p1++) {
|
|
|
- case jump_n:
|
|
|
- is_a_jump_n = true;
|
|
|
- case pop_failure_jump:
|
|
|
- case maybe_pop_jump:
|
|
|
- case jump:
|
|
|
- case dummy_failure_jump:
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
- if (is_a_jump_n)
|
|
|
- p1 += 2;
|
|
|
- break;
|
|
|
+ PATTERN is the address of the pattern string.
|
|
|
|
|
|
- default:
|
|
|
- ;
|
|
|
- }
|
|
|
- p1 += mcnt;
|
|
|
+ CFLAGS is a series of bits which affect compilation.
|
|
|
|
|
|
-
|
|
|
- to an on_failure_jump right before the start_memory
|
|
|
- corresponding to this stop_memory, exit from the loop
|
|
|
- by forcing a failure after pushing on the stack the
|
|
|
- on_failure_jump's jump in the pattern, and d. */
|
|
|
- if (mcnt < 0 && (re_opcode_t) * p1 == on_failure_jump
|
|
|
- && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) {
|
|
|
-
|
|
|
- what its registers were before trying this last
|
|
|
- failed match, e.g., with `(a*)*b' against `ab' for
|
|
|
- regstart[1], and, e.g., with `((a*)*(b*)*)*'
|
|
|
- against `aba' for regend[3].
|
|
|
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
|
|
|
+ use POSIX basic syntax.
|
|
|
|
|
|
- Also restore the registers for inner groups for,
|
|
|
- e.g., `((a*)(b*))*' against `aba' (register 3 would
|
|
|
- otherwise get trashed). */
|
|
|
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
|
|
|
+ Also, regexec will try a match beginning after every newline.
|
|
|
|
|
|
- if (EVER_MATCHED_SOMETHING(reg_info[*p])) {
|
|
|
- unsigned r;
|
|
|
+ If REG_ICASE is set, then we considers upper- and lowercase
|
|
|
+ versions of letters to be equivalent when matching.
|
|
|
|
|
|
- EVER_MATCHED_SOMETHING(reg_info[*p]) = 0;
|
|
|
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
|
|
|
+ routine will report only success or failure, and nothing about the
|
|
|
+ registers.
|
|
|
|
|
|
-
|
|
|
- for (r = *p;
|
|
|
- r < (unsigned) *p + (unsigned) *(p + 1); r++) {
|
|
|
- regstart[r] = old_regstart[r];
|
|
|
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
|
|
|
+ the return codes and their meanings.) */
|
|
|
|
|
|
-
|
|
|
- if (old_regend[r] >= regstart[r])
|
|
|
- regend[r] = old_regend[r];
|
|
|
- }
|
|
|
- }
|
|
|
- p1++;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
- PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
|
|
|
+int regcomp(preg, pattern, cflags)
|
|
|
+regex_t *preg;
|
|
|
+const char *pattern;
|
|
|
+int cflags;
|
|
|
+{
|
|
|
+ reg_errcode_t ret;
|
|
|
+ reg_syntax_t syntax
|
|
|
+ = (cflags & REG_EXTENDED) ?
|
|
|
|
|
|
- goto fail;
|
|
|
- }
|
|
|
- }
|
|
|
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
|
|
|
|
|
|
-
|
|
|
- p += 2;
|
|
|
- break;
|
|
|
+
|
|
|
+ preg->buffer = 0;
|
|
|
+ preg->allocated = 0;
|
|
|
+ preg->used = 0;
|
|
|
|
|
|
+
|
|
|
+ preg->fastmap = (char *) malloc(1 << BYTEWIDTH);
|
|
|
|
|
|
-
|
|
|
- followed by the numeric value of <digit> as the register number. */
|
|
|
- case duplicate:
|
|
|
- {
|
|
|
- register const char *d2, *dend2;
|
|
|
- int regno = *p++;
|
|
|
+ if (cflags & REG_ICASE) {
|
|
|
+ unsigned i;
|
|
|
|
|
|
- DEBUG_PRINT2("EXECUTING duplicate %d.\n", regno);
|
|
|
+ preg->translate
|
|
|
+ = (RE_TRANSLATE_TYPE) malloc(CHAR_SET_SIZE
|
|
|
+ * sizeof(*(RE_TRANSLATE_TYPE) 0));
|
|
|
+ if (preg->translate == NULL)
|
|
|
+ return (int) REG_ESPACE;
|
|
|
|
|
|
-
|
|
|
- if (REG_UNSET(regstart[regno]) || REG_UNSET(regend[regno]))
|
|
|
- goto fail;
|
|
|
+
|
|
|
+ for (i = 0; i < CHAR_SET_SIZE; i++)
|
|
|
+ preg->translate[i] = ISUPPER(i) ? TOLOWER(i) : i;
|
|
|
+ } else
|
|
|
+ preg->translate = NULL;
|
|
|
|
|
|
-
|
|
|
- d2 = regstart[regno];
|
|
|
+
|
|
|
+ if (cflags & REG_NEWLINE) {
|
|
|
+ syntax &= ~RE_DOT_NEWLINE;
|
|
|
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
|
|
|
+
|
|
|
+ preg->newline_anchor = 1;
|
|
|
+ } else
|
|
|
+ preg->newline_anchor = 0;
|
|
|
|
|
|
-
|
|
|
- the place to stop matching are in the same string, then
|
|
|
- set to the place to stop, otherwise, for now have to use
|
|
|
- the end of the first string. */
|
|
|
+ preg->no_sub = !!(cflags & REG_NOSUB);
|
|
|
|
|
|
- dend2 = ((FIRST_STRING_P(regstart[regno])
|
|
|
- == FIRST_STRING_P(regend[regno]))
|
|
|
- ? regend[regno] : end_match_1);
|
|
|
- for (;;) {
|
|
|
-
|
|
|
- contents. */
|
|
|
- while (d2 == dend2) {
|
|
|
- if (dend2 == end_match_2)
|
|
|
- break;
|
|
|
- if (dend2 == regend[regno])
|
|
|
- break;
|
|
|
+
|
|
|
+ can use strlen here in compiling the pattern. */
|
|
|
+ ret = regex_compile(pattern, strlen(pattern), syntax, preg);
|
|
|
|
|
|
-
|
|
|
- d2 = string2;
|
|
|
- dend2 = regend[regno];
|
|
|
- }
|
|
|
-
|
|
|
- if (d2 == dend2)
|
|
|
- break;
|
|
|
+
|
|
|
+ unmatched close-group: both are REG_EPAREN. */
|
|
|
+ if (ret == REG_ERPAREN)
|
|
|
+ ret = REG_EPAREN;
|
|
|
|
|
|
-
|
|
|
- PREFETCH();
|
|
|
+ if (ret == REG_NOERROR && preg->fastmap) {
|
|
|
+
|
|
|
+ buffer. */
|
|
|
+ if (re_compile_fastmap(preg) == -2) {
|
|
|
+
|
|
|
+ about it. */
|
|
|
+ free(preg->fastmap);
|
|
|
+ preg->fastmap = NULL;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return (int) ret;
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- mcnt = dend - d;
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__regcomp, regcomp)
|
|
|
+#endif
|
|
|
+
|
|
|
+ string STRING.
|
|
|
|
|
|
-
|
|
|
- one shot, so, if necessary, adjust the count. */
|
|
|
- if (mcnt > dend2 - d2)
|
|
|
- mcnt = dend2 - d2;
|
|
|
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
|
|
|
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
|
|
|
+ least NMATCH elements, and we set them to the offsets of the
|
|
|
+ corresponding matched substrings.
|
|
|
|
|
|
-
|
|
|
- past them. */
|
|
|
- if (translate ? bcmp_translate(d, d2, mcnt, translate)
|
|
|
- : memcmp(d, d2, mcnt))
|
|
|
- goto fail;
|
|
|
- d += mcnt, d2 += mcnt;
|
|
|
+ EFLAGS specifies `execution flags' which affect matching: if
|
|
|
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
|
|
|
+ string; if REG_NOTEOL is set, then $ does not match at the end.
|
|
|
|
|
|
-
|
|
|
- SET_REGS_MATCHED();
|
|
|
- }
|
|
|
- }
|
|
|
- break;
|
|
|
+ We return 0 if we find a match and REG_NOMATCH if not. */
|
|
|
+int regexec(preg, string, nmatch, pmatch, eflags)
|
|
|
+const regex_t *preg;
|
|
|
+const char *string;
|
|
|
+size_t nmatch;
|
|
|
+regmatch_t pmatch[];
|
|
|
+int eflags;
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+ struct re_registers regs;
|
|
|
+ regex_t private_preg;
|
|
|
+ int len = strlen(string);
|
|
|
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
|
|
|
|
|
|
+ private_preg = *preg;
|
|
|
|
|
|
-
|
|
|
- (unless `not_bol' is set in `bufp'), and, if
|
|
|
- `newline_anchor' is set, after newlines. */
|
|
|
- case begline:
|
|
|
- DEBUG_PRINT1("EXECUTING begline.\n");
|
|
|
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
|
|
|
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
|
|
|
|
|
|
- if (AT_STRINGS_BEG(d)) {
|
|
|
- if (!bufp->not_bol)
|
|
|
- break;
|
|
|
- } else if (d[-1] == '\n' && bufp->newline_anchor) {
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- goto fail;
|
|
|
+
|
|
|
+ information about, via `nmatch'. We have to pass that on to the
|
|
|
+ matching routines. */
|
|
|
+ private_preg.regs_allocated = REGS_FIXED;
|
|
|
|
|
|
+ if (want_reg_info) {
|
|
|
+ regs.num_regs = nmatch;
|
|
|
+ regs.start = TALLOC(nmatch * 2, regoff_t);
|
|
|
+ if (regs.start == NULL)
|
|
|
+ return (int) REG_NOMATCH;
|
|
|
+ regs.end = regs.start + nmatch;
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- case endline:
|
|
|
- DEBUG_PRINT1("EXECUTING endline.\n");
|
|
|
+
|
|
|
+ ret = re_search(&private_preg, string, len,
|
|
|
+ 0, len,
|
|
|
+ want_reg_info ? ®s : (struct re_registers *) 0);
|
|
|
|
|
|
- if (AT_STRINGS_END(d)) {
|
|
|
- if (!bufp->not_eol)
|
|
|
- break;
|
|
|
- }
|
|
|
+
|
|
|
+ if (want_reg_info) {
|
|
|
+ if (ret >= 0) {
|
|
|
+ unsigned r;
|
|
|
|
|
|
-
|
|
|
- else if ((d == end1 ? *string2 : *d) == '\n'
|
|
|
- && bufp->newline_anchor) {
|
|
|
- break;
|
|
|
+ for (r = 0; r < nmatch; r++) {
|
|
|
+ pmatch[r].rm_so = regs.start[r];
|
|
|
+ pmatch[r].rm_eo = regs.end[r];
|
|
|
}
|
|
|
- goto fail;
|
|
|
-
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- case begbuf:
|
|
|
- DEBUG_PRINT1("EXECUTING begbuf.\n");
|
|
|
- if (AT_STRINGS_BEG(d))
|
|
|
- break;
|
|
|
- goto fail;
|
|
|
+
|
|
|
+ free(regs.start);
|
|
|
+ }
|
|
|
|
|
|
+
|
|
|
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
- case endbuf:
|
|
|
- DEBUG_PRINT1("EXECUTING endbuf.\n");
|
|
|
- if (AT_STRINGS_END(d))
|
|
|
- break;
|
|
|
- goto fail;
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__regexec, regexec)
|
|
|
+#endif
|
|
|
+
|
|
|
+ from either regcomp or regexec. We don't use PREG here. */
|
|
|
+ size_t regerror(errcode, preg, errbuf, errbuf_size)
|
|
|
+int errcode;
|
|
|
+const regex_t *preg;
|
|
|
+char *errbuf;
|
|
|
+size_t errbuf_size;
|
|
|
+{
|
|
|
+ const char *msg;
|
|
|
+ size_t msg_size;
|
|
|
|
|
|
+ if (errcode < 0 || errcode >= (int) (sizeof(re_error_msgid_idx)
|
|
|
+ / sizeof(re_error_msgid_idx[0])))
|
|
|
+
|
|
|
+ to this routine. If we are given anything else, or if other regex
|
|
|
+ code generates an invalid error code, then the program has a bug.
|
|
|
+ Dump core so we can fix it. */
|
|
|
+ abort();
|
|
|
|
|
|
-
|
|
|
- pushes NULL as the value for the string on the stack. Then
|
|
|
- `pop_failure_point' will keep the current value for the
|
|
|
- string, instead of restoring it. To see why, consider
|
|
|
- matching `foo\nbar' against `.*\n'. The .* matches the foo;
|
|
|
- then the . fails against the \n. But the next thing we want
|
|
|
- to do is match the \n against the \n; if we restored the
|
|
|
- string value, we would be back at the foo.
|
|
|
+ msg = gettext(re_error_msgid + re_error_msgid_idx[errcode]);
|
|
|
|
|
|
- Because this is used only in specific cases, we don't need to
|
|
|
- check all the things that `on_failure_jump' does, to make
|
|
|
- sure the right things get saved on the stack. Hence we don't
|
|
|
- share its code. The only reason to push anything on the
|
|
|
- stack at all is that otherwise we would have to change
|
|
|
- `anychar's code to do something besides goto fail in this
|
|
|
- case; that seems worse than this. */
|
|
|
- case on_failure_keep_string_jump:
|
|
|
- DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
|
|
|
+ msg_size = strlen(msg) + 1;
|
|
|
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT3(" %d (to %p):\n", mcnt, p + mcnt);
|
|
|
+ if (errbuf_size != 0) {
|
|
|
+ if (msg_size > errbuf_size) {
|
|
|
+#if defined HAVE_MEMPCPY || defined _LIBC
|
|
|
+ *((char *) __mempcpy(errbuf, msg, errbuf_size - 1)) = '\0';
|
|
|
#else
|
|
|
- DEBUG_PRINT3(" %d (to 0x%x):\n", mcnt, p + mcnt);
|
|
|
+ memcpy(errbuf, msg, errbuf_size - 1);
|
|
|
+ errbuf[errbuf_size - 1] = 0;
|
|
|
#endif
|
|
|
+ } else
|
|
|
+ memcpy(errbuf, msg, msg_size);
|
|
|
+ }
|
|
|
|
|
|
- PUSH_FAILURE_POINT(p + mcnt, NULL, -2);
|
|
|
- break;
|
|
|
+ return msg_size;
|
|
|
+}
|
|
|
|
|
|
+#ifdef _LIBC
|
|
|
+weak_alias(__regerror, regerror)
|
|
|
+#endif
|
|
|
+
|
|
|
+void regfree(preg)
|
|
|
+regex_t *preg;
|
|
|
+{
|
|
|
+ if (preg->buffer != NULL)
|
|
|
+ free(preg->buffer);
|
|
|
+ preg->buffer = NULL;
|
|
|
|
|
|
-
|
|
|
+ preg->allocated = 0;
|
|
|
+ preg->used = 0;
|
|
|
|
|
|
- Each alternative starts with an on_failure_jump that points
|
|
|
- to the beginning of the next alternative. Each alternative
|
|
|
- except the last ends with a jump that in effect jumps past
|
|
|
- the rest of the alternatives. (They really jump to the
|
|
|
- ending jump of the following alternative, because tensioning
|
|
|
- these jumps is a hassle.)
|
|
|
+ if (preg->fastmap != NULL)
|
|
|
+ free(preg->fastmap);
|
|
|
+ preg->fastmap = NULL;
|
|
|
+ preg->fastmap_accurate = 0;
|
|
|
|
|
|
- Repeats start with an on_failure_jump that points past both
|
|
|
- the repetition text and either the following jump or
|
|
|
- pop_failure_jump back to this on_failure_jump. */
|
|
|
- case on_failure_jump:
|
|
|
- on_failure:
|
|
|
- DEBUG_PRINT1("EXECUTING on_failure_jump");
|
|
|
+ if (preg->translate != NULL)
|
|
|
+ free(preg->translate);
|
|
|
+ preg->translate = NULL;
|
|
|
+}
|
|
|
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
#ifdef _LIBC
|
|
|
- DEBUG_PRINT3(" %d (to %p)", mcnt, p + mcnt);
|
|
|
-#else
|
|
|
- DEBUG_PRINT3(" %d (to 0x%x)", mcnt, p + mcnt);
|
|
|
+weak_alias(__regfree, regfree)
|
|
|
#endif
|
|
|
+#endif
|
|
|
+
|
|
|
+
|
|
|
+ Returns one of error codes defined in `regex.h', or zero for success.
|
|
|
|
|
|
-
|
|
|
- the original * applied to a group), save the information
|
|
|
- for that group and all inner ones, so that if we fail back
|
|
|
- to this point, the group's information will be correct.
|
|
|
- For example, in \(a*\)*\1, we need the preceding group,
|
|
|
- and in \(zz\(a*\)b*\)\2, we need the inner group. */
|
|
|
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
|
|
|
+ fields are set in BUFP on entry.
|
|
|
|
|
|
-
|
|
|
- a failure point to `p + mcnt' after we do this. */
|
|
|
- p1 = p;
|
|
|
+ If it succeeds, results are put in BUFP (if it returns an error, the
|
|
|
+ contents of BUFP are undefined):
|
|
|
+ `buffer' is the compiled pattern;
|
|
|
+ `syntax' is set to SYNTAX;
|
|
|
+ `used' is set to the length of the compiled pattern;
|
|
|
+ `fastmap_accurate' is zero;
|
|
|
+ `re_nsub' is the number of subexpressions in PATTERN;
|
|
|
+ `not_bol' and `not_eol' are zero;
|
|
|
|
|
|
-
|
|
|
- start_memory in case this on_failure_jump is happening as
|
|
|
- the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
|
|
|
- against aba. */
|
|
|
- while (p1 < pend && (re_opcode_t) * p1 == no_op)
|
|
|
- p1++;
|
|
|
+ The `fastmap' and `newline_anchor' fields are neither
|
|
|
+ examined nor set. */
|
|
|
|
|
|
- if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
|
|
|
-
|
|
|
- get reset at the start_memory we are about to get to,
|
|
|
- but we will have saved all the registers relevant to
|
|
|
- this repetition op, as described above. */
|
|
|
- highest_active_reg = *(p1 + 1) + *(p1 + 2);
|
|
|
- if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
|
|
|
- lowest_active_reg = *(p1 + 1);
|
|
|
- }
|
|
|
+
|
|
|
+#define FREE_STACK_RETURN(value) \
|
|
|
+ return (free (compile_stack.stack), value)
|
|
|
|
|
|
- DEBUG_PRINT1(":\n");
|
|
|
- PUSH_FAILURE_POINT(p + mcnt, d, -2);
|
|
|
- break;
|
|
|
+static reg_errcode_t regex_compile(pattern, size, syntax, bufp)
|
|
|
+const char *pattern;
|
|
|
+size_t size;
|
|
|
+reg_syntax_t syntax;
|
|
|
+struct re_pattern_buffer *bufp;
|
|
|
+{
|
|
|
+
|
|
|
+ `char *' (i.e., signed), we declare these variables as unsigned, so
|
|
|
+ they can be reliably used as array indices. */
|
|
|
+ register unsigned char c, c1;
|
|
|
|
|
|
+
|
|
|
+ const char *p1;
|
|
|
|
|
|
-
|
|
|
- We change it to either `pop_failure_jump' or `jump'. */
|
|
|
- case maybe_pop_jump:
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
- DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
|
|
|
- {
|
|
|
- register unsigned char *p2 = p;
|
|
|
+
|
|
|
+ register unsigned char *b;
|
|
|
|
|
|
-
|
|
|
- pattern follows its end. If we can establish that there
|
|
|
- is nothing that they would both match, i.e., that we
|
|
|
- would have to backtrack because of (as in, e.g., `a*a')
|
|
|
- then we can change to pop_failure_jump, because we'll
|
|
|
- never have to backtrack.
|
|
|
+
|
|
|
+ compile_stack_type compile_stack;
|
|
|
|
|
|
- This is not true in the case of alternatives: in
|
|
|
- `(a|ab)*' we do need to backtrack to the `ab' alternative
|
|
|
- (e.g., if the string was `ab'). But instead of trying to
|
|
|
- detect that here, the alternative has put on a dummy
|
|
|
- failure point which is what we will end up popping. */
|
|
|
+
|
|
|
+ const char *p = pattern;
|
|
|
+ const char *pend = pattern + size;
|
|
|
|
|
|
-
|
|
|
- If what follows this loop is a ...+ construct,
|
|
|
- look at what begins its body, since we will have to
|
|
|
- match at least one of that. */
|
|
|
- while (1) {
|
|
|
- if (p2 + 2 < pend
|
|
|
- && ((re_opcode_t) * p2 == stop_memory
|
|
|
- || (re_opcode_t) * p2 == start_memory))
|
|
|
- p2 += 3;
|
|
|
- else if (p2 + 6 < pend
|
|
|
- && (re_opcode_t) * p2 == dummy_failure_jump)
|
|
|
- p2 += 6;
|
|
|
- else
|
|
|
- break;
|
|
|
- }
|
|
|
+
|
|
|
+ RE_TRANSLATE_TYPE translate = bufp->translate;
|
|
|
|
|
|
- p1 = p + mcnt;
|
|
|
-
|
|
|
- to the `maybe_finalize_jump' of this case. Examine what
|
|
|
- follows. */
|
|
|
+
|
|
|
+ command. This makes it possible to tell if a new exact-match
|
|
|
+ character can be added to that command or if the character requires
|
|
|
+ a new `exactn' command. */
|
|
|
+ unsigned char *pending_exact = 0;
|
|
|
|
|
|
-
|
|
|
- if (p2 == pend) {
|
|
|
-
|
|
|
- against ":/". I don't really understand this code
|
|
|
- yet. */
|
|
|
- p[-3] = (unsigned char) pop_failure_jump;
|
|
|
- DEBUG_PRINT1
|
|
|
- (" End of pattern: change to `pop_failure_jump'.\n");
|
|
|
- }
|
|
|
+
|
|
|
+ This tells, e.g., postfix * where to find the start of its
|
|
|
+ operand. Reset at the beginning of groups and alternatives. */
|
|
|
+ unsigned char *laststart = 0;
|
|
|
|
|
|
- else if ((re_opcode_t) * p2 == exactn
|
|
|
- || (bufp->newline_anchor
|
|
|
- && (re_opcode_t) * p2 == endline)) {
|
|
|
- register unsigned char c =
|
|
|
- *p2 == (unsigned char) endline ? '\n' : p2[2];
|
|
|
+
|
|
|
+ unsigned char *begalt;
|
|
|
|
|
|
- if ((re_opcode_t) p1[3] == exactn && p1[5] != c) {
|
|
|
- p[-3] = (unsigned char) pop_failure_jump;
|
|
|
- DEBUG_PRINT3(" %c != %c => pop_failure_jump.\n",
|
|
|
- c, p1[5]);
|
|
|
- }
|
|
|
+
|
|
|
+ which to go back if the interval is invalid. */
|
|
|
+ const char *beg_interval;
|
|
|
|
|
|
- else if ((re_opcode_t) p1[3] == charset
|
|
|
- || (re_opcode_t) p1[3] == charset_not) {
|
|
|
- int not = (re_opcode_t) p1[3] == charset_not;
|
|
|
+
|
|
|
+ the containing expression. Each alternative of an `or' -- except the
|
|
|
+ last -- ends with a forward jump of this sort. */
|
|
|
+ unsigned char *fixup_alt_jump = 0;
|
|
|
|
|
|
- if (c < (unsigned char) (p1[4] * BYTEWIDTH)
|
|
|
- && p1[5 +
|
|
|
- c / BYTEWIDTH] & (1 << (c %
|
|
|
- BYTEWIDTH))) not
|
|
|
- = !not;
|
|
|
+
|
|
|
+ matching close-group on the compile stack, so the same register
|
|
|
+ number is put in the stop_memory as the start_memory. */
|
|
|
+ regnum_t regnum = 0;
|
|
|
|
|
|
-
|
|
|
- that we can't change to pop_failure_jump. */
|
|
|
- if (!not) {
|
|
|
- p[-3] = (unsigned char) pop_failure_jump;
|
|
|
- DEBUG_PRINT1
|
|
|
- (" No match => pop_failure_jump.\n");
|
|
|
- }
|
|
|
- }
|
|
|
- } else if ((re_opcode_t) * p2 == charset) {
|
|
|
-
|
|
|
- of the charset. */
|
|
|
- if ((re_opcode_t) p1[3] == exactn
|
|
|
- && !((int) p2[1] * BYTEWIDTH > (int) p1[5]
|
|
|
- && (p2[2 + p1[5] / BYTEWIDTH]
|
|
|
- & (1 << (p1[5] % BYTEWIDTH))))) {
|
|
|
- p[-3] = (unsigned char) pop_failure_jump;
|
|
|
- DEBUG_PRINT1(" No match => pop_failure_jump.\n");
|
|
|
- }
|
|
|
+#ifdef DEBUG
|
|
|
+ DEBUG_PRINT1("\nCompiling pattern: ");
|
|
|
+ if (debug) {
|
|
|
+ unsigned debug_count;
|
|
|
|
|
|
- else if ((re_opcode_t) p1[3] == charset_not) {
|
|
|
- int idx;
|
|
|
+ for (debug_count = 0; debug_count < size; debug_count++)
|
|
|
+ putchar(pattern[debug_count]);
|
|
|
+ putchar('\n');
|
|
|
+ }
|
|
|
+#endif
|
|
|
|
|
|
-
|
|
|
- lists every character listed in the charset after. */
|
|
|
- for (idx = 0; idx < (int) p2[1]; idx++)
|
|
|
- if (!(p2[2 + idx] == 0 || (idx < (int) p1[4]
|
|
|
- &&
|
|
|
- ((p2
|
|
|
- [2 +
|
|
|
- idx] & ~p1[5 +
|
|
|
- idx])
|
|
|
- == 0))))
|
|
|
- break;
|
|
|
+
|
|
|
+ compile_stack.stack =
|
|
|
+ TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
|
|
|
+ if (compile_stack.stack == NULL)
|
|
|
+ return REG_ESPACE;
|
|
|
|
|
|
- if (idx == p2[1]) {
|
|
|
- p[-3] = (unsigned char) pop_failure_jump;
|
|
|
- DEBUG_PRINT1
|
|
|
- (" No match => pop_failure_jump.\n");
|
|
|
- }
|
|
|
- } else if ((re_opcode_t) p1[3] == charset) {
|
|
|
- int idx;
|
|
|
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
|
|
|
+ compile_stack.avail = 0;
|
|
|
|
|
|
-
|
|
|
- has no overlap with the one after the loop. */
|
|
|
- for (idx = 0;
|
|
|
- idx < (int) p2[1] && idx < (int) p1[4]; idx++)
|
|
|
- if ((p2[2 + idx] & p1[5 + idx]) != 0)
|
|
|
- break;
|
|
|
+
|
|
|
+ bufp->syntax = syntax;
|
|
|
+ bufp->fastmap_accurate = 0;
|
|
|
+ bufp->not_bol = bufp->not_eol = 0;
|
|
|
|
|
|
- if (idx == p2[1] || idx == p1[4]) {
|
|
|
- p[-3] = (unsigned char) pop_failure_jump;
|
|
|
- DEBUG_PRINT1
|
|
|
- (" No match => pop_failure_jump.\n");
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- p -= 2;
|
|
|
- if ((re_opcode_t) p[-1] != pop_failure_jump) {
|
|
|
- p[-1] = (unsigned char) jump;
|
|
|
- DEBUG_PRINT1(" Match => jump.\n");
|
|
|
- goto unconditional_jump;
|
|
|
- }
|
|
|
-
|
|
|
+
|
|
|
+ printer (for debugging) will think there's no pattern. We reset it
|
|
|
+ at the end. */
|
|
|
+ bufp->used = 0;
|
|
|
|
|
|
+
|
|
|
+ bufp->re_nsub = 0;
|
|
|
|
|
|
-
|
|
|
- its matching on_failure_jump, where the latter will push a
|
|
|
- failure point. The pop_failure_jump takes off failure
|
|
|
- points put on by this pop_failure_jump's matching
|
|
|
- on_failure_jump; we got through the pattern to here from the
|
|
|
- matching on_failure_jump, so didn't fail. */
|
|
|
- case pop_failure_jump:
|
|
|
- {
|
|
|
-
|
|
|
- highest registers, even though we don't care about the
|
|
|
- actual values. Otherwise, we will restore only one
|
|
|
- register from the stack, since lowest will == highest in
|
|
|
- `pop_failure_point'. */
|
|
|
- active_reg_t dummy_low_reg, dummy_high_reg;
|
|
|
- unsigned char *pdummy;
|
|
|
- const char *sdummy;
|
|
|
+#if !defined emacs && !defined SYNTAX_TABLE
|
|
|
+
|
|
|
+ init_syntax_once();
|
|
|
+#endif
|
|
|
|
|
|
- DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
|
|
|
- POP_FAILURE_POINT(sdummy, pdummy,
|
|
|
- dummy_low_reg, dummy_high_reg,
|
|
|
- reg_dummy, reg_dummy, reg_info_dummy);
|
|
|
+ if (bufp->allocated == 0) {
|
|
|
+ if (bufp->buffer) {
|
|
|
+ enough space. This loses if buffer's address is bogus, but
|
|
|
+ that is the user's responsibility. */
|
|
|
+ RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
|
|
|
+ } else {
|
|
|
+ bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
|
|
|
}
|
|
|
-
|
|
|
+ if (!bufp->buffer)
|
|
|
+ FREE_STACK_RETURN(REG_ESPACE);
|
|
|
|
|
|
- unconditional_jump:
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT2("\n%p: ", p);
|
|
|
-#else
|
|
|
- DEBUG_PRINT2("\n0x%x: ", p);
|
|
|
-#endif
|
|
|
-
|
|
|
+ bufp->allocated = INIT_BUF_SIZE;
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- case jump:
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
- DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
|
|
|
- p += mcnt;
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT2("(to %p).\n", p);
|
|
|
-#else
|
|
|
- DEBUG_PRINT2("(to 0x%x).\n", p);
|
|
|
-#endif
|
|
|
+ begalt = b = bufp->buffer;
|
|
|
+
|
|
|
+
|
|
|
+ while (p != pend) {
|
|
|
+ PATFETCH(c);
|
|
|
+
|
|
|
+ switch (c) {
|
|
|
+ case '^':
|
|
|
+ {
|
|
|
+ if (
|
|
|
+ p == pattern + 1
|
|
|
+
|
|
|
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
|
|
|
+
|
|
|
+ || at_begline_loc_p(pattern, p, syntax))
|
|
|
+ BUF_PUSH(begline);
|
|
|
+ else
|
|
|
+ goto normal_char;
|
|
|
+ }
|
|
|
break;
|
|
|
|
|
|
|
|
|
-
|
|
|
- in `group_match_null_string_p' et al. */
|
|
|
- case jump_past_alt:
|
|
|
- DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
|
|
|
- goto unconditional_jump;
|
|
|
+ case '$':
|
|
|
+ {
|
|
|
+ if (
|
|
|
+ p == pend
|
|
|
+
|
|
|
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
|
|
|
+
|
|
|
+ || at_endline_loc_p(p, pend, syntax))
|
|
|
+ BUF_PUSH(endline);
|
|
|
+ else
|
|
|
+ goto normal_char;
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
|
|
|
-
|
|
|
- then gets popped at pop_failure_jump. We will end up at
|
|
|
- pop_failure_jump, also, and with a pattern of, say, `a+', we
|
|
|
- are skipping over the on_failure_jump, so we have to push
|
|
|
- something meaningless for pop_failure_jump to pop. */
|
|
|
- case dummy_failure_jump:
|
|
|
- DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
|
|
|
-
|
|
|
- the code at `fail' tests is the value for the pattern. */
|
|
|
- PUSH_FAILURE_POINT(NULL, NULL, -2);
|
|
|
- goto unconditional_jump;
|
|
|
+ case '+':
|
|
|
+ case '?':
|
|
|
+ if ((syntax & RE_BK_PLUS_QM)
|
|
|
+ || (syntax & RE_LIMITED_OPS))
|
|
|
+ goto normal_char;
|
|
|
+ handle_plus:
|
|
|
+ case '*':
|
|
|
+
|
|
|
+ if (!laststart) {
|
|
|
+ if (syntax & RE_CONTEXT_INVALID_OPS)
|
|
|
+ FREE_STACK_RETURN(REG_BADRPT);
|
|
|
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
|
|
|
+ goto normal_char;
|
|
|
+ }
|
|
|
|
|
|
+ {
|
|
|
+
|
|
|
+ boolean keep_string_p = false;
|
|
|
|
|
|
-
|
|
|
- point in case we are followed by a `pop_failure_jump', because
|
|
|
- we don't want the failure point for the alternative to be
|
|
|
- popped. For example, matching `(a|ab)*' against `aab'
|
|
|
- requires that we match the `ab' alternative. */
|
|
|
- case push_dummy_failure:
|
|
|
- DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
|
|
|
-
|
|
|
- two zeroes. */
|
|
|
- PUSH_FAILURE_POINT(NULL, NULL, -2);
|
|
|
- break;
|
|
|
+
|
|
|
+ char zero_times_ok = 0, many_times_ok = 0;
|
|
|
|
|
|
-
|
|
|
- After that, handle like `on_failure_jump'. */
|
|
|
- case succeed_n:
|
|
|
- EXTRACT_NUMBER(mcnt, p + 2);
|
|
|
- DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
|
|
|
+
|
|
|
+ down to just one (the right one). We can't combine
|
|
|
+ interval operators with these because of, e.g., `a{2}*',
|
|
|
+ which should only match an even number of `a's. */
|
|
|
|
|
|
- assert(mcnt >= 0);
|
|
|
-
|
|
|
- if (mcnt > 0) {
|
|
|
- mcnt--;
|
|
|
- p += 2;
|
|
|
- STORE_NUMBER_AND_INCR(p, mcnt);
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT3(" Setting %p to %d.\n", p - 2, mcnt);
|
|
|
-#else
|
|
|
- DEBUG_PRINT3(" Setting 0x%x to %d.\n", p - 2, mcnt);
|
|
|
-#endif
|
|
|
- } else if (mcnt == 0) {
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT2(" Setting two bytes from %p to no_op.\n",
|
|
|
- p + 2);
|
|
|
-#else
|
|
|
- DEBUG_PRINT2(" Setting two bytes from 0x%x to no_op.\n",
|
|
|
- p + 2);
|
|
|
-#endif
|
|
|
- p[2] = (unsigned char) no_op;
|
|
|
- p[3] = (unsigned char) no_op;
|
|
|
- goto on_failure;
|
|
|
- }
|
|
|
- break;
|
|
|
+ for (;;) {
|
|
|
+ zero_times_ok |= c != '+';
|
|
|
+ many_times_ok |= c != '?';
|
|
|
|
|
|
- case jump_n:
|
|
|
- EXTRACT_NUMBER(mcnt, p + 2);
|
|
|
- DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
|
|
|
+ if (p == pend)
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- if (mcnt) {
|
|
|
- mcnt--;
|
|
|
- STORE_NUMBER(p + 2, mcnt);
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT3(" Setting %p to %d.\n", p + 2, mcnt);
|
|
|
-#else
|
|
|
- DEBUG_PRINT3(" Setting 0x%x to %d.\n", p + 2, mcnt);
|
|
|
-#endif
|
|
|
- goto unconditional_jump;
|
|
|
- }
|
|
|
-
|
|
|
- else
|
|
|
- p += 4;
|
|
|
- break;
|
|
|
+ PATFETCH(c);
|
|
|
|
|
|
- case set_number_at:
|
|
|
- {
|
|
|
- DEBUG_PRINT1("EXECUTING set_number_at.\n");
|
|
|
+ if (c == '*'
|
|
|
+ || (!(syntax & RE_BK_PLUS_QM)
|
|
|
+ && (c == '+' || c == '?')));
|
|
|
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
- p1 = p + mcnt;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
-#ifdef _LIBC
|
|
|
- DEBUG_PRINT3(" Setting %p to %d.\n", p1, mcnt);
|
|
|
-#else
|
|
|
- DEBUG_PRINT3(" Setting 0x%x to %d.\n", p1, mcnt);
|
|
|
-#endif
|
|
|
- STORE_NUMBER(p1, mcnt);
|
|
|
- break;
|
|
|
- }
|
|
|
+ else if (syntax & RE_BK_PLUS_QM && c == '\\') {
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EESCAPE);
|
|
|
|
|
|
-#if 0
|
|
|
-
|
|
|
- test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
|
|
|
- AT_WORD_BOUNDARY, so this code is disabled. Expanding the
|
|
|
- macro and introducing temporary variables works around the bug. */
|
|
|
+ PATFETCH(c1);
|
|
|
+ if (!(c1 == '+' || c1 == '?')) {
|
|
|
+ PATUNFETCH;
|
|
|
+ PATUNFETCH;
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
- case wordbound:
|
|
|
- DEBUG_PRINT1("EXECUTING wordbound.\n");
|
|
|
- if (AT_WORD_BOUNDARY(d))
|
|
|
- break;
|
|
|
- goto fail;
|
|
|
+ c = c1;
|
|
|
+ } else {
|
|
|
+ PATUNFETCH;
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
- case notwordbound:
|
|
|
- DEBUG_PRINT1("EXECUTING notwordbound.\n");
|
|
|
- if (AT_WORD_BOUNDARY(d))
|
|
|
- goto fail;
|
|
|
- break;
|
|
|
-#else
|
|
|
- case wordbound:
|
|
|
- {
|
|
|
- boolean prevchar, thischar;
|
|
|
+
|
|
|
+ }
|
|
|
|
|
|
- DEBUG_PRINT1("EXECUTING wordbound.\n");
|
|
|
- if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d))
|
|
|
- break;
|
|
|
+
|
|
|
+ to an empty pattern. */
|
|
|
+ if (!laststart)
|
|
|
+ break;
|
|
|
|
|
|
- prevchar = WORDCHAR_P(d - 1);
|
|
|
- thischar = WORDCHAR_P(d);
|
|
|
- if (prevchar != thischar)
|
|
|
- break;
|
|
|
- goto fail;
|
|
|
- }
|
|
|
+
|
|
|
+ and also whether or not two or more matches is allowed. */
|
|
|
+ if (many_times_ok) {
|
|
|
+ end a backward relative jump from `b' to before the next
|
|
|
+ jump we're going to put in below (which jumps from
|
|
|
+ laststart to after this jump).
|
|
|
|
|
|
- case notwordbound:
|
|
|
- {
|
|
|
- boolean prevchar, thischar;
|
|
|
+ But if we are at the `*' in the exact sequence `.*\n',
|
|
|
+ insert an unconditional jump backwards to the .,
|
|
|
+ instead of the beginning of the loop. This way we only
|
|
|
+ push a failure point once, instead of every time
|
|
|
+ through the loop. */
|
|
|
+ assert(p - 1 > pattern);
|
|
|
|
|
|
- DEBUG_PRINT1("EXECUTING notwordbound.\n");
|
|
|
- if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d))
|
|
|
- goto fail;
|
|
|
+
|
|
|
+ GET_BUFFER_SPACE(3);
|
|
|
|
|
|
- prevchar = WORDCHAR_P(d - 1);
|
|
|
- thischar = WORDCHAR_P(d);
|
|
|
- if (prevchar != thischar)
|
|
|
- goto fail;
|
|
|
- break;
|
|
|
- }
|
|
|
-#endif
|
|
|
+
|
|
|
+ because laststart was nonzero. And we've already
|
|
|
+ incremented `p', by the way, to be the character after
|
|
|
+ the `*'. Do we have to do something analogous here
|
|
|
+ for null bytes, because of RE_DOT_NOT_NULL? */
|
|
|
+ if (TRANSLATE(*(p - 2)) == TRANSLATE('.')
|
|
|
+ && zero_times_ok
|
|
|
+ && p < pend && TRANSLATE(*p) == TRANSLATE('\n')
|
|
|
+ && !(syntax & RE_DOT_NEWLINE)) {
|
|
|
+ STORE_JUMP(jump, b, laststart);
|
|
|
+ keep_string_p = true;
|
|
|
+ } else
|
|
|
+
|
|
|
+ STORE_JUMP(maybe_pop_jump, b, laststart - 3);
|
|
|
|
|
|
- case wordbeg:
|
|
|
- DEBUG_PRINT1("EXECUTING wordbeg.\n");
|
|
|
- if (WORDCHAR_P(d) && (AT_STRINGS_BEG(d) || !WORDCHAR_P(d - 1)))
|
|
|
- break;
|
|
|
- goto fail;
|
|
|
+
|
|
|
+ b += 3;
|
|
|
+ }
|
|
|
|
|
|
- case wordend:
|
|
|
- DEBUG_PRINT1("EXECUTING wordend.\n");
|
|
|
- if (!AT_STRINGS_BEG(d) && WORDCHAR_P(d - 1)
|
|
|
- && (!WORDCHAR_P(d) || AT_STRINGS_END(d)))
|
|
|
- break;
|
|
|
- goto fail;
|
|
|
+
|
|
|
+ end of the buffer after this jump is inserted. */
|
|
|
+ GET_BUFFER_SPACE(3);
|
|
|
+ INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump
|
|
|
+ : on_failure_jump, laststart, b + 3);
|
|
|
+ pending_exact = 0;
|
|
|
+ b += 3;
|
|
|
|
|
|
-#ifdef emacs
|
|
|
- case before_dot:
|
|
|
- DEBUG_PRINT1("EXECUTING before_dot.\n");
|
|
|
- if (PTR_CHAR_POS((unsigned char *) d) >= point)
|
|
|
- goto fail;
|
|
|
+ if (!zero_times_ok) {
|
|
|
+
|
|
|
+ `dummy_failure_jump' before the initial
|
|
|
+ `on_failure_jump' instruction of the loop. This
|
|
|
+ effects a skip over that instruction the first time
|
|
|
+ we hit that loop. */
|
|
|
+ GET_BUFFER_SPACE(3);
|
|
|
+ INSERT_JUMP(dummy_failure_jump, laststart,
|
|
|
+ laststart + 6);
|
|
|
+ b += 3;
|
|
|
+ }
|
|
|
+ }
|
|
|
break;
|
|
|
|
|
|
- case at_dot:
|
|
|
- DEBUG_PRINT1("EXECUTING at_dot.\n");
|
|
|
- if (PTR_CHAR_POS((unsigned char *) d) != point)
|
|
|
- goto fail;
|
|
|
- break;
|
|
|
|
|
|
- case after_dot:
|
|
|
- DEBUG_PRINT1("EXECUTING after_dot.\n");
|
|
|
- if (PTR_CHAR_POS((unsigned char *) d) <= point)
|
|
|
- goto fail;
|
|
|
+ case '.':
|
|
|
+ laststart = b;
|
|
|
+ BUF_PUSH(anychar);
|
|
|
break;
|
|
|
|
|
|
- case syntaxspec:
|
|
|
- DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
|
|
|
- mcnt = *p++;
|
|
|
- goto matchsyntax;
|
|
|
|
|
|
- case wordchar:
|
|
|
- DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
|
|
|
- mcnt = (int) Sword;
|
|
|
- matchsyntax:
|
|
|
- PREFETCH();
|
|
|
-
|
|
|
- d++;
|
|
|
- if (SYNTAX(d[-1]) != (enum syntaxcode) mcnt)
|
|
|
- goto fail;
|
|
|
- SET_REGS_MATCHED();
|
|
|
- break;
|
|
|
+ case '[':
|
|
|
+ {
|
|
|
+ boolean had_char_class = false;
|
|
|
|
|
|
- case notsyntaxspec:
|
|
|
- DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
|
|
|
- mcnt = *p++;
|
|
|
- goto matchnotsyntax;
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
- case notwordchar:
|
|
|
- DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
|
|
|
- mcnt = (int) Sword;
|
|
|
- matchnotsyntax:
|
|
|
- PREFETCH();
|
|
|
-
|
|
|
- d++;
|
|
|
- if (SYNTAX(d[-1]) == (enum syntaxcode) mcnt)
|
|
|
- goto fail;
|
|
|
- SET_REGS_MATCHED();
|
|
|
- break;
|
|
|
+
|
|
|
+ opcode, the length count, and the bitset; 34 bytes in all. */
|
|
|
+ GET_BUFFER_SPACE(34);
|
|
|
|
|
|
-#else
|
|
|
- case wordchar:
|
|
|
- DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
|
|
|
- PREFETCH();
|
|
|
- if (!WORDCHAR_P(d))
|
|
|
- goto fail;
|
|
|
- SET_REGS_MATCHED();
|
|
|
- d++;
|
|
|
- break;
|
|
|
+ laststart = b;
|
|
|
|
|
|
- case notwordchar:
|
|
|
- DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
|
|
|
- PREFETCH();
|
|
|
- if (WORDCHAR_P(d))
|
|
|
- goto fail;
|
|
|
- SET_REGS_MATCHED();
|
|
|
- d++;
|
|
|
- break;
|
|
|
-#endif
|
|
|
+
|
|
|
+ statement, so we only need one BUF_PUSH. */
|
|
|
+ BUF_PUSH(*p == '^' ? charset_not : charset);
|
|
|
+ if (*p == '^')
|
|
|
+ p++;
|
|
|
|
|
|
- default:
|
|
|
- abort();
|
|
|
- }
|
|
|
- continue;
|
|
|
+
|
|
|
+ p1 = p;
|
|
|
|
|
|
+
|
|
|
+ BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
|
|
|
|
|
|
-
|
|
|
- fail:
|
|
|
- if (!FAIL_STACK_EMPTY()) {
|
|
|
- DEBUG_PRINT1("\nFAIL:\n");
|
|
|
- POP_FAILURE_POINT(d, p,
|
|
|
- lowest_active_reg, highest_active_reg,
|
|
|
- regstart, regend, reg_info);
|
|
|
+
|
|
|
+ bzero(b, (1 << BYTEWIDTH) / BYTEWIDTH);
|
|
|
|
|
|
-
|
|
|
- if (!p)
|
|
|
- goto fail;
|
|
|
+
|
|
|
+ if ((re_opcode_t) b[-2] == charset_not
|
|
|
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) SET_LIST_BIT('\n');
|
|
|
|
|
|
-
|
|
|
- assert(p <= pend);
|
|
|
- if (p < pend) {
|
|
|
- boolean is_a_jump_n = false;
|
|
|
+
|
|
|
+ for (;;) {
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
-
|
|
|
- loop, need to pop this failure point and use the next one. */
|
|
|
- switch ((re_opcode_t) * p) {
|
|
|
- case jump_n:
|
|
|
- is_a_jump_n = true;
|
|
|
- case maybe_pop_jump:
|
|
|
- case pop_failure_jump:
|
|
|
- case jump:
|
|
|
- p1 = p + 1;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
- p1 += mcnt;
|
|
|
+ PATFETCH(c);
|
|
|
|
|
|
- if ((is_a_jump_n && (re_opcode_t) * p1 == succeed_n)
|
|
|
- || (!is_a_jump_n
|
|
|
- && (re_opcode_t) * p1 == on_failure_jump))
|
|
|
- goto fail;
|
|
|
- break;
|
|
|
- default:
|
|
|
- ;
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') {
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EESCAPE);
|
|
|
|
|
|
- if (d >= string1 && d <= end1)
|
|
|
- dend = end_match_1;
|
|
|
- } else
|
|
|
- break;
|
|
|
- }
|
|
|
+ PATFETCH(c1);
|
|
|
+ SET_LIST_BIT(c1);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
- if (best_regs_set)
|
|
|
- goto restore_best_regs;
|
|
|
+
|
|
|
+ not (i.e., when the bracket expression is `[]' so
|
|
|
+ far), the ']' character bit gets set way below. */
|
|
|
+ if (c == ']' && p != p1 + 1)
|
|
|
+ break;
|
|
|
|
|
|
- FREE_VARIABLES();
|
|
|
+
|
|
|
+ was a character class. */
|
|
|
+ if (had_char_class && c == '-' && *p != ']')
|
|
|
+ FREE_STACK_RETURN(REG_ERANGE);
|
|
|
|
|
|
- return -1;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+ was a character: if this is a hyphen not at the
|
|
|
+ beginning or the end of a list, then it's the range
|
|
|
+ operator. */
|
|
|
+ if (c == '-' && !(p - 2 >= pattern && p[-2] == '[')
|
|
|
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
|
|
|
+ && *p != ']') {
|
|
|
+ reg_errcode_t ret
|
|
|
+ = compile_range(&p, pend, translate, syntax, b);
|
|
|
|
|
|
+ if (ret != REG_NOERROR)
|
|
|
+ FREE_STACK_RETURN(ret);
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
+ else if (p[0] == '-' && p[1] != ']') {
|
|
|
+ reg_errcode_t ret;
|
|
|
|
|
|
- Return true if the pattern up to the corresponding stop_memory can
|
|
|
- match the empty string, and false otherwise.
|
|
|
+
|
|
|
+ PATFETCH(c1);
|
|
|
|
|
|
- If we find the matching stop_memory, sets P to point to one past its number.
|
|
|
- Otherwise, sets P to an undefined byte less than or equal to END.
|
|
|
+ ret = compile_range(&p, pend, translate, syntax, b);
|
|
|
+ if (ret != REG_NOERROR)
|
|
|
+ FREE_STACK_RETURN(ret);
|
|
|
+ }
|
|
|
|
|
|
- We don't handle duplicates properly (yet). */
|
|
|
+
|
|
|
+ class. */
|
|
|
|
|
|
-static boolean group_match_null_string_p(p, end, reg_info)
|
|
|
-unsigned char **p, *end;
|
|
|
-register_info_type *reg_info;
|
|
|
-{
|
|
|
- int mcnt;
|
|
|
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') {
|
|
|
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
|
|
|
|
|
|
-
|
|
|
- unsigned char *p1 = *p + 2;
|
|
|
+ PATFETCH(c);
|
|
|
+ c1 = 0;
|
|
|
|
|
|
- while (p1 < end) {
|
|
|
-
|
|
|
- false, as appropriate, when we get to one that can't, or to the
|
|
|
- matching stop_memory. */
|
|
|
+
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
- switch ((re_opcode_t) * p1) {
|
|
|
-
|
|
|
- case on_failure_jump:
|
|
|
- p1++;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ for (;;) {
|
|
|
+ PATFETCH(c);
|
|
|
+ if ((c == ':' && *p == ']') || p == pend)
|
|
|
+ break;
|
|
|
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
|
|
|
+ str[c1++] = c;
|
|
|
+ else
|
|
|
+
|
|
|
+ str[0] = '\0';
|
|
|
+ }
|
|
|
+ str[c1] = '\0';
|
|
|
|
|
|
-
|
|
|
- pattern. */
|
|
|
+
|
|
|
+ undo the ending character, the letters, and leave
|
|
|
+ the leading `:' and `[' (but set bits for them). */
|
|
|
+ if (c == ':' && *p == ']') {
|
|
|
+#if defined _LIBC || WIDE_CHAR_SUPPORT
|
|
|
+ boolean is_lower = STREQ(str, "lower");
|
|
|
+ boolean is_upper = STREQ(str, "upper");
|
|
|
+ wctype_t wt;
|
|
|
+ int ch;
|
|
|
|
|
|
- if (mcnt >= 0) {
|
|
|
-
|
|
|
- seeing if any of the alternatives cannot match nothing.
|
|
|
- The last alternative starts with only a jump,
|
|
|
- whereas the rest start with on_failure_jump and end
|
|
|
- with a jump, e.g., here is the pattern for `a|b|c':
|
|
|
+ wt = IS_CHAR_CLASS(str);
|
|
|
+ if (wt == 0)
|
|
|
+ FREE_STACK_RETURN(REG_ECTYPE);
|
|
|
|
|
|
- /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
|
|
|
- /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
|
|
|
- /exactn/1/c
|
|
|
+
|
|
|
+ class. */
|
|
|
+ PATFETCH(c);
|
|
|
|
|
|
- So, we have to first go through the first (n-1)
|
|
|
- alternatives and then deal with the last one separately. */
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) {
|
|
|
+# ifdef _LIBC
|
|
|
+ if (__iswctype(__btowc(ch), wt))
|
|
|
+ SET_LIST_BIT(ch);
|
|
|
+# else
|
|
|
+ if (iswctype(btowc(ch), wt))
|
|
|
+ SET_LIST_BIT(ch);
|
|
|
+# endif
|
|
|
|
|
|
-
|
|
|
- with an on_failure_jump (see above) that jumps to right
|
|
|
- past a jump_past_alt. */
|
|
|
+ if (translate && (is_upper || is_lower)
|
|
|
+ && (ISUPPER(ch) || ISLOWER(ch)))
|
|
|
+ SET_LIST_BIT(ch);
|
|
|
+ }
|
|
|
|
|
|
- while ((re_opcode_t) p1[mcnt - 3] == jump_past_alt) {
|
|
|
-
|
|
|
- is, including the ending `jump_past_alt' and
|
|
|
- its number. */
|
|
|
+ had_char_class = true;
|
|
|
+#else
|
|
|
+ int ch;
|
|
|
+ boolean is_alnum = STREQ(str, "alnum");
|
|
|
+ boolean is_alpha = STREQ(str, "alpha");
|
|
|
+ boolean is_blank = STREQ(str, "blank");
|
|
|
+ boolean is_cntrl = STREQ(str, "cntrl");
|
|
|
+ boolean is_digit = STREQ(str, "digit");
|
|
|
+ boolean is_graph = STREQ(str, "graph");
|
|
|
+ boolean is_lower = STREQ(str, "lower");
|
|
|
+ boolean is_print = STREQ(str, "print");
|
|
|
+ boolean is_punct = STREQ(str, "punct");
|
|
|
+ boolean is_space = STREQ(str, "space");
|
|
|
+ boolean is_upper = STREQ(str, "upper");
|
|
|
+ boolean is_xdigit = STREQ(str, "xdigit");
|
|
|
|
|
|
- if (!alt_match_null_string_p(p1, p1 + mcnt - 3,
|
|
|
- reg_info)) return false;
|
|
|
+ if (!IS_CHAR_CLASS(str))
|
|
|
+ FREE_STACK_RETURN(REG_ECTYPE);
|
|
|
|
|
|
-
|
|
|
- jump_past_alt. */
|
|
|
- p1 += mcnt;
|
|
|
+
|
|
|
+ class. */
|
|
|
+ PATFETCH(c);
|
|
|
|
|
|
-
|
|
|
- that doesn't begin with an on_failure_jump. */
|
|
|
- if ((re_opcode_t) * p1 != on_failure_jump)
|
|
|
- break;
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EBRACK);
|
|
|
|
|
|
-
|
|
|
- alternative that starts with an on_failure_jump. */
|
|
|
- p1++;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
- if ((re_opcode_t) p1[mcnt - 3] != jump_past_alt) {
|
|
|
-
|
|
|
- p1 -= 3;
|
|
|
- break;
|
|
|
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
|
|
|
+
|
|
|
+ avoid an arbitrary limit in some compiler. */
|
|
|
+ if ((is_alnum && ISALNUM(ch))
|
|
|
+ || (is_alpha && ISALPHA(ch))
|
|
|
+ || (is_blank && ISBLANK(ch))
|
|
|
+ || (is_cntrl && ISCNTRL(ch)))
|
|
|
+ SET_LIST_BIT(ch);
|
|
|
+ if ((is_digit && ISDIGIT(ch))
|
|
|
+ || (is_graph && ISGRAPH(ch))
|
|
|
+ || (is_lower && ISLOWER(ch))
|
|
|
+ || (is_print && ISPRINT(ch)))
|
|
|
+ SET_LIST_BIT(ch);
|
|
|
+ if ((is_punct && ISPUNCT(ch))
|
|
|
+ || (is_space && ISSPACE(ch))
|
|
|
+ || (is_upper && ISUPPER(ch))
|
|
|
+ || (is_xdigit && ISXDIGIT(ch)))
|
|
|
+ SET_LIST_BIT(ch);
|
|
|
+ if (translate && (is_upper || is_lower)
|
|
|
+ && (ISUPPER(ch) || ISLOWER(ch)))
|
|
|
+ SET_LIST_BIT(ch);
|
|
|
+ }
|
|
|
+ had_char_class = true;
|
|
|
+#endif
|
|
|
+ } else {
|
|
|
+ c1++;
|
|
|
+ while (c1--)
|
|
|
+ PATUNFETCH;
|
|
|
+ SET_LIST_BIT('[');
|
|
|
+ SET_LIST_BIT(':');
|
|
|
+ had_char_class = false;
|
|
|
}
|
|
|
+ } else {
|
|
|
+ had_char_class = false;
|
|
|
+ SET_LIST_BIT(c);
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- of the `jump_past_alt' just before it. `mcnt' contains
|
|
|
- the length of the alternative. */
|
|
|
- EXTRACT_NUMBER(mcnt, p1 - 2);
|
|
|
+
|
|
|
+ end of the map. Decrease the map-length byte too. */
|
|
|
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
|
|
|
+ b[-1]--;
|
|
|
+ b += b[-1];
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
- if (!alt_match_null_string_p(p1, p1 + mcnt, reg_info))
|
|
|
- return false;
|
|
|
|
|
|
- p1 += mcnt;
|
|
|
- }
|
|
|
- break;
|
|
|
+ case '(':
|
|
|
+ if (syntax & RE_NO_BK_PARENS)
|
|
|
+ goto handle_open;
|
|
|
+ else
|
|
|
+ goto normal_char;
|
|
|
|
|
|
|
|
|
- case stop_memory:
|
|
|
- assert(p1[1] == **p);
|
|
|
- *p = p1 + 2;
|
|
|
- return true;
|
|
|
+ case ')':
|
|
|
+ if (syntax & RE_NO_BK_PARENS)
|
|
|
+ goto handle_close;
|
|
|
+ else
|
|
|
+ goto normal_char;
|
|
|
|
|
|
|
|
|
- default:
|
|
|
- if (!common_op_match_null_string_p(&p1, end, reg_info))
|
|
|
- return false;
|
|
|
- }
|
|
|
- }
|
|
|
+ case '\n':
|
|
|
+ if (syntax & RE_NEWLINE_ALT)
|
|
|
+ goto handle_alt;
|
|
|
+ else
|
|
|
+ goto normal_char;
|
|
|
|
|
|
- return false;
|
|
|
-}
|
|
|
|
|
|
+ case '|':
|
|
|
+ if (syntax & RE_NO_BK_VBAR)
|
|
|
+ goto handle_alt;
|
|
|
+ else
|
|
|
+ goto normal_char;
|
|
|
|
|
|
-
|
|
|
- It expects P to be the first byte of a single alternative and END one
|
|
|
- byte past the last. The alternative can contain groups. */
|
|
|
|
|
|
-static boolean alt_match_null_string_p(p, end, reg_info)
|
|
|
-unsigned char *p, *end;
|
|
|
-register_info_type *reg_info;
|
|
|
-{
|
|
|
- int mcnt;
|
|
|
- unsigned char *p1 = p;
|
|
|
+ case '{':
|
|
|
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
|
|
|
+ goto handle_interval;
|
|
|
+ else
|
|
|
+ goto normal_char;
|
|
|
|
|
|
- while (p1 < end) {
|
|
|
-
|
|
|
- to one that can't. */
|
|
|
|
|
|
- switch ((re_opcode_t) * p1) {
|
|
|
-
|
|
|
- case on_failure_jump:
|
|
|
- p1++;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
- p1 += mcnt;
|
|
|
- break;
|
|
|
+ case '\\':
|
|
|
+ if (p == pend)
|
|
|
+ FREE_STACK_RETURN(REG_EESCAPE);
|
|
|
|
|
|
- default:
|
|
|
- if (!common_op_match_null_string_p(&p1, end, reg_info))
|
|
|
- return false;
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ distinguish, e.g., \B from \b, even if we normally would
|
|
|
+ translate, e.g., B to b. */
|
|
|
+ PATFETCH_RAW(c);
|
|
|
|
|
|
- return true;
|
|
|
-}
|
|
|
+ switch (c) {
|
|
|
+ case '(':
|
|
|
+ if (syntax & RE_NO_BK_PARENS)
|
|
|
+ goto normal_backslash;
|
|
|
|
|
|
+ handle_open:
|
|
|
+ bufp->re_nsub++;
|
|
|
+ regnum++;
|
|
|
|
|
|
-
|
|
|
- alt_match_null_string_p.
|
|
|
+ if (COMPILE_STACK_FULL) {
|
|
|
+ RETALLOC(compile_stack.stack, compile_stack.size << 1,
|
|
|
+ compile_stack_elt_t);
|
|
|
+ if (compile_stack.stack == NULL)
|
|
|
+ return REG_ESPACE;
|
|
|
|
|
|
- Sets P to one after the op and its arguments, if any. */
|
|
|
+ compile_stack.size <<= 1;
|
|
|
+ }
|
|
|
|
|
|
-static boolean common_op_match_null_string_p(p, end, reg_info)
|
|
|
-unsigned char **p, *end;
|
|
|
-register_info_type *reg_info;
|
|
|
-{
|
|
|
- int mcnt;
|
|
|
- boolean ret;
|
|
|
- int reg_no;
|
|
|
- unsigned char *p1 = *p;
|
|
|
+
|
|
|
+ group. They are all relative offsets, so that if the
|
|
|
+ whole pattern moves because of realloc, they will still
|
|
|
+ be valid. */
|
|
|
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
|
|
|
+ COMPILE_STACK_TOP.fixup_alt_jump
|
|
|
+ =
|
|
|
+ fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
|
|
|
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
|
|
|
+ COMPILE_STACK_TOP.regnum = regnum;
|
|
|
|
|
|
- switch ((re_opcode_t) * p1++) {
|
|
|
- case no_op:
|
|
|
- case begline:
|
|
|
- case endline:
|
|
|
- case begbuf:
|
|
|
- case endbuf:
|
|
|
- case wordbeg:
|
|
|
- case wordend:
|
|
|
- case wordbound:
|
|
|
- case notwordbound:
|
|
|
-#ifdef emacs
|
|
|
- case before_dot:
|
|
|
- case at_dot:
|
|
|
- case after_dot:
|
|
|
-#endif
|
|
|
- break;
|
|
|
+
|
|
|
+ groups inner to this one. But do not push a
|
|
|
+ start_memory for groups beyond the last one we can
|
|
|
+ represent in the compiled pattern. */
|
|
|
+ if (regnum <= MAX_REGNUM) {
|
|
|
+ COMPILE_STACK_TOP.inner_group_offset =
|
|
|
+ b - bufp->buffer + 2;
|
|
|
+ BUF_PUSH_3(start_memory, regnum, 0);
|
|
|
+ }
|
|
|
|
|
|
- case start_memory:
|
|
|
- reg_no = *p1;
|
|
|
- assert(reg_no > 0 && reg_no <= MAX_REGNUM);
|
|
|
- ret = group_match_null_string_p(&p1, end, reg_info);
|
|
|
+ compile_stack.avail++;
|
|
|
|
|
|
-
|
|
|
- contains a group and a back reference to it. */
|
|
|
+ fixup_alt_jump = 0;
|
|
|
+ laststart = 0;
|
|
|
+ begalt = b;
|
|
|
+
|
|
|
+ won't actually generate any code, so we'll have to
|
|
|
+ clear pending_exact explicitly. */
|
|
|
+ pending_exact = 0;
|
|
|
+ break;
|
|
|
|
|
|
- if (REG_MATCH_NULL_STRING_P(reg_info[reg_no]) ==
|
|
|
- MATCH_NULL_UNSET_VALUE)
|
|
|
- REG_MATCH_NULL_STRING_P(reg_info[reg_no]) = ret;
|
|
|
|
|
|
- if (!ret)
|
|
|
- return false;
|
|
|
- break;
|
|
|
+ case ')':
|
|
|
+ if (syntax & RE_NO_BK_PARENS)
|
|
|
+ goto normal_backslash;
|
|
|
|
|
|
-
|
|
|
- case jump:
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
- if (mcnt >= 0)
|
|
|
- p1 += mcnt;
|
|
|
- else
|
|
|
- return false;
|
|
|
- break;
|
|
|
+ if (COMPILE_STACK_EMPTY) {
|
|
|
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
|
|
|
+ goto normal_backslash;
|
|
|
+ else
|
|
|
+ FREE_STACK_RETURN(REG_ERPAREN);
|
|
|
+ }
|
|
|
|
|
|
- case succeed_n:
|
|
|
-
|
|
|
- p1 += 2;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
+ handle_close:
|
|
|
+ if (fixup_alt_jump) {
|
|
|
+ alternative for a possible future
|
|
|
+ `pop_failure_jump' to pop. See comments at
|
|
|
+ `push_dummy_failure' in `re_match_2'. */
|
|
|
+ BUF_PUSH(push_dummy_failure);
|
|
|
|
|
|
- if (mcnt == 0) {
|
|
|
- p1 -= 4;
|
|
|
- EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
- p1 += mcnt;
|
|
|
- } else
|
|
|
- return false;
|
|
|
- break;
|
|
|
+
|
|
|
+ to `fixup_alt_jump', in the `handle_alt' case below. */
|
|
|
+ STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1);
|
|
|
+ }
|
|
|
|
|
|
- case duplicate:
|
|
|
- if (!REG_MATCH_NULL_STRING_P(reg_info[*p1]))
|
|
|
- return false;
|
|
|
- break;
|
|
|
+
|
|
|
+ if (COMPILE_STACK_EMPTY) {
|
|
|
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
|
|
|
+ goto normal_char;
|
|
|
+ else
|
|
|
+ FREE_STACK_RETURN(REG_ERPAREN);
|
|
|
+ }
|
|
|
|
|
|
- case set_number_at:
|
|
|
- p1 += 4;
|
|
|
+
|
|
|
+ ``can't happen''. */
|
|
|
+ assert(compile_stack.avail != 0);
|
|
|
+ {
|
|
|
+
|
|
|
+ later groups should continue to be numbered higher,
|
|
|
+ as in `(ab)c(de)' -- the second group is #2. */
|
|
|
+ regnum_t this_group_regnum;
|
|
|
|
|
|
- default:
|
|
|
-
|
|
|
- return false;
|
|
|
- }
|
|
|
+ compile_stack.avail--;
|
|
|
+ begalt =
|
|
|
+ bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
|
|
|
+ fixup_alt_jump =
|
|
|
+ COMPILE_STACK_TOP.fixup_alt_jump ? bufp->buffer +
|
|
|
+ COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0;
|
|
|
+ laststart =
|
|
|
+ bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
|
|
|
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
|
|
|
+
|
|
|
+ won't actually generate any code, so we'll have to
|
|
|
+ clear pending_exact explicitly. */
|
|
|
+ pending_exact = 0;
|
|
|
|
|
|
- *p = p1;
|
|
|
- return true;
|
|
|
-}
|
|
|
+
|
|
|
+ groups were inside this one. */
|
|
|
+ if (this_group_regnum <= MAX_REGNUM) {
|
|
|
+ unsigned char *inner_group_loc
|
|
|
|
|
|
+ =
|
|
|
+ bufp->buffer +
|
|
|
+ COMPILE_STACK_TOP.inner_group_offset;
|
|
|
|
|
|
-
|
|
|
- bytes; nonzero otherwise. */
|
|
|
+ *inner_group_loc = regnum - this_group_regnum;
|
|
|
+ BUF_PUSH_3(stop_memory, this_group_regnum,
|
|
|
+ regnum - this_group_regnum);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
-static int bcmp_translate(s1, s2, len, translate)
|
|
|
-const char *s1, *s2;
|
|
|
-register int len;
|
|
|
-RE_TRANSLATE_TYPE translate;
|
|
|
-{
|
|
|
- register const unsigned char *p1 = (const unsigned char *) s1;
|
|
|
- register const unsigned char *p2 = (const unsigned char *) s2;
|
|
|
|
|
|
- while (len) {
|
|
|
- if (translate[*p1++] != translate[*p2++])
|
|
|
- return 1;
|
|
|
- len--;
|
|
|
- }
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
+ case '|':
|
|
|
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
|
|
|
+ goto normal_backslash;
|
|
|
+ handle_alt:
|
|
|
+ if (syntax & RE_LIMITED_OPS)
|
|
|
+ goto normal_char;
|
|
|
|
|
|
-
|
|
|
- compiles PATTERN (of length SIZE) and puts the result in BUFP.
|
|
|
- Returns 0 if the pattern was valid, otherwise an error string.
|
|
|
+
|
|
|
+ jumps to this alternative if the former fails. */
|
|
|
+ GET_BUFFER_SPACE(3);
|
|
|
+ INSERT_JUMP(on_failure_jump, begalt, b + 6);
|
|
|
+ pending_exact = 0;
|
|
|
+ b += 3;
|
|
|
|
|
|
- Assumes the `allocated' (and perhaps `buffer') and `translate' fields
|
|
|
- are set in BUFP on entry.
|
|
|
+
|
|
|
+ which gets executed if it gets matched. Adjust that
|
|
|
+ jump so it will jump to this alternative's analogous
|
|
|
+ jump (put in below, which in turn will jump to the next
|
|
|
+ (if any) alternative's such jump, etc.). The last such
|
|
|
+ jump jumps to the correct final destination. A picture:
|
|
|
+ _____ _____
|
|
|
+ | | | |
|
|
|
+ | v | v
|
|
|
+ a | b | c
|
|
|
|
|
|
- We call regex_compile to do the actual compilation. */
|
|
|
+ If we are at `b', then fixup_alt_jump right now points to a
|
|
|
+ three-byte space after `a'. We'll put in the jump, set
|
|
|
+ fixup_alt_jump to right after `b', and leave behind three
|
|
|
+ bytes which we'll fill in when we get to after `c'. */
|
|
|
|
|
|
-const char *re_compile_pattern(pattern, length, bufp)
|
|
|
-const char *pattern;
|
|
|
-size_t length;
|
|
|
-struct re_pattern_buffer *bufp;
|
|
|
-{
|
|
|
- reg_errcode_t ret;
|
|
|
+ if (fixup_alt_jump)
|
|
|
+ STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
|
|
|
|
|
|
-
|
|
|
- (and at least one extra will be -1). */
|
|
|
- bufp->regs_allocated = REGS_UNALLOCATED;
|
|
|
+
|
|
|
+ to be filled in later either by next alternative or
|
|
|
+ when know we're at the end of a series of alternatives. */
|
|
|
+ fixup_alt_jump = b;
|
|
|
+ GET_BUFFER_SPACE(3);
|
|
|
+ b += 3;
|
|
|
|
|
|
-
|
|
|
- by passing null for the REGS argument to re_match, etc., not by
|
|
|
- setting no_sub. */
|
|
|
- bufp->no_sub = 0;
|
|
|
+ laststart = 0;
|
|
|
+ begalt = b;
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- bufp->newline_anchor = 1;
|
|
|
|
|
|
- ret = regex_compile(pattern, length, re_syntax_options, bufp);
|
|
|
+ case '{':
|
|
|
+
|
|
|
+ if (!(syntax & RE_INTERVALS)
|
|
|
+
|
|
|
+ operator. */
|
|
|
+ || ((syntax & RE_INTERVALS)
|
|
|
+ && (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern
|
|
|
+ && p == pend))
|
|
|
+ goto normal_backslash;
|
|
|
|
|
|
- if (!ret)
|
|
|
- return NULL;
|
|
|
- return gettext(re_error_msgid + re_error_msgid_idx[(int) ret]);
|
|
|
-}
|
|
|
+ handle_interval:
|
|
|
+ {
|
|
|
+
|
|
|
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__re_compile_pattern, re_compile_pattern)
|
|
|
-#endif
|
|
|
-
|
|
|
- them unless specifically requested. */
|
|
|
-#if defined _REGEX_RE_COMP || defined _LIBC
|
|
|
-
|
|
|
-static struct re_pattern_buffer re_comp_buf;
|
|
|
+
|
|
|
+ int lower_bound = -1, upper_bound = -1;
|
|
|
|
|
|
-char *
|
|
|
-#ifdef _LIBC
|
|
|
-
|
|
|
- these names if they don't use our functions, and still use
|
|
|
- regcomp/regexec below without link errors. */ weak_function
|
|
|
-#endif
|
|
|
-re_comp(s)
|
|
|
-const char *s;
|
|
|
-{
|
|
|
- reg_errcode_t ret;
|
|
|
+ beg_interval = p - 1;
|
|
|
|
|
|
- if (!s) {
|
|
|
- if (!re_comp_buf.buffer)
|
|
|
- return gettext("No previous regular expression");
|
|
|
- return 0;
|
|
|
- }
|
|
|
+ if (p == pend) {
|
|
|
+ if (!(syntax & RE_INTERVALS)
|
|
|
+ && (syntax & RE_NO_BK_BRACES)) goto
|
|
|
+ unfetch_interval;
|
|
|
+ else
|
|
|
+ FREE_STACK_RETURN(REG_EBRACE);
|
|
|
+ }
|
|
|
|
|
|
- if (!re_comp_buf.buffer) {
|
|
|
- re_comp_buf.buffer = (unsigned char *) malloc(200);
|
|
|
- if (re_comp_buf.buffer == NULL)
|
|
|
- return (char *) gettext(re_error_msgid
|
|
|
- +
|
|
|
- re_error_msgid_idx[(int) REG_ESPACE]);
|
|
|
- re_comp_buf.allocated = 200;
|
|
|
+ GET_UNSIGNED_NUMBER(lower_bound);
|
|
|
|
|
|
- re_comp_buf.fastmap = (char *) malloc(1 << BYTEWIDTH);
|
|
|
- if (re_comp_buf.fastmap == NULL)
|
|
|
- return (char *) gettext(re_error_msgid
|
|
|
- +
|
|
|
- re_error_msgid_idx[(int) REG_ESPACE]);
|
|
|
- }
|
|
|
+ if (c == ',') {
|
|
|
+ GET_UNSIGNED_NUMBER(upper_bound);
|
|
|
+ if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
|
|
|
+ || ((syntax & RE_NO_BK_BRACES) && c != '}'))
|
|
|
+ FREE_STACK_RETURN(REG_BADBR);
|
|
|
|
|
|
-
|
|
|
- don't need to initialize the pattern buffer fields which affect it. */
|
|
|
+ if (upper_bound < 0)
|
|
|
+ upper_bound = RE_DUP_MAX;
|
|
|
+ } else
|
|
|
+
|
|
|
+ upper_bound = lower_bound;
|
|
|
|
|
|
-
|
|
|
- re_comp_buf.newline_anchor = 1;
|
|
|
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
|
|
|
+ || lower_bound > upper_bound) {
|
|
|
+ if (!(syntax & RE_INTERVALS)
|
|
|
+ && (syntax & RE_NO_BK_BRACES)) goto
|
|
|
+ unfetch_interval;
|
|
|
+ else
|
|
|
+ FREE_STACK_RETURN(REG_BADBR);
|
|
|
+ }
|
|
|
|
|
|
- ret = regex_compile(s, strlen(s), re_syntax_options, &re_comp_buf);
|
|
|
+ if (!(syntax & RE_NO_BK_BRACES)) {
|
|
|
+ if (c != '\\')
|
|
|
+ FREE_STACK_RETURN(REG_EBRACE);
|
|
|
|
|
|
- if (!ret)
|
|
|
- return NULL;
|
|
|
+ PATFETCH(c);
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- return (char *) gettext(re_error_msgid +
|
|
|
- re_error_msgid_idx[(int) ret]);
|
|
|
-}
|
|
|
+ if (c != '}') {
|
|
|
+ if (!(syntax & RE_INTERVALS)
|
|
|
+ && (syntax & RE_NO_BK_BRACES)) goto
|
|
|
+ unfetch_interval;
|
|
|
+ else
|
|
|
+ FREE_STACK_RETURN(REG_BADBR);
|
|
|
+ }
|
|
|
|
|
|
+
|
|
|
|
|
|
-int
|
|
|
-#ifdef _LIBC
|
|
|
- weak_function
|
|
|
-#endif
|
|
|
-re_exec(s)
|
|
|
-const char *s;
|
|
|
-{
|
|
|
- const int len = strlen(s);
|
|
|
+
|
|
|
+ if (!laststart) {
|
|
|
+ if (syntax & RE_CONTEXT_INVALID_OPS)
|
|
|
+ FREE_STACK_RETURN(REG_BADRPT);
|
|
|
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
|
|
|
+ laststart = b;
|
|
|
+ else
|
|
|
+ goto unfetch_interval;
|
|
|
+ }
|
|
|
|
|
|
- return
|
|
|
- 0 <= re_search(&re_comp_buf, s, len, 0, len,
|
|
|
- (struct re_registers *) 0);
|
|
|
-}
|
|
|
+
|
|
|
+ all; jump from `laststart' to `b + 3', which will be
|
|
|
+ the end of the buffer after we insert the jump. */
|
|
|
+ if (upper_bound == 0) {
|
|
|
+ GET_BUFFER_SPACE(3);
|
|
|
+ INSERT_JUMP(jump, laststart, b + 3);
|
|
|
+ b += 3;
|
|
|
+ }
|
|
|
|
|
|
-#endif
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+ we're all done, the pattern will look like:
|
|
|
+ set_number_at <jump count> <upper bound>
|
|
|
+ set_number_at <succeed_n count> <lower bound>
|
|
|
+ succeed_n <after jump addr> <succeed_n count>
|
|
|
+ <body of loop>
|
|
|
+ jump_n <succeed_n addr> <jump count>
|
|
|
+ (The upper bound and `jump_n' are omitted if
|
|
|
+ `upper_bound' is 1, though.) */
|
|
|
+ else {
|
|
|
+ more at the end of the loop. */
|
|
|
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
|
|
|
|
|
|
-#ifndef emacs
|
|
|
+ GET_BUFFER_SPACE(nbytes);
|
|
|
|
|
|
-
|
|
|
+
|
|
|
+ though it will be set during matching by its
|
|
|
+ attendant `set_number_at' (inserted next),
|
|
|
+ because `re_compile_fastmap' needs to know.
|
|
|
+ Jump to the `jump_n' we might insert below. */
|
|
|
+ INSERT_JUMP2(succeed_n, laststart,
|
|
|
+ b + 5 + (upper_bound > 1) * 5,
|
|
|
+ lower_bound);
|
|
|
+ b += 5;
|
|
|
|
|
|
- PREG is a regex_t *. We do not expect any fields to be initialized,
|
|
|
- since POSIX says we shouldn't. Thus, we set
|
|
|
+
|
|
|
+ before the `succeed_n'. The `5' is the last two
|
|
|
+ bytes of this `set_number_at', plus 3 bytes of
|
|
|
+ the following `succeed_n'. */
|
|
|
+ insert_op2(set_number_at, laststart, 5,
|
|
|
+ lower_bound, b);
|
|
|
+ b += 5;
|
|
|
|
|
|
- `buffer' to the compiled pattern;
|
|
|
- `used' to the length of the compiled pattern;
|
|
|
- `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
|
|
|
- REG_EXTENDED bit in CFLAGS is set; otherwise, to
|
|
|
- RE_SYNTAX_POSIX_BASIC;
|
|
|
- `newline_anchor' to REG_NEWLINE being set in CFLAGS;
|
|
|
- `fastmap' to an allocated space for the fastmap;
|
|
|
- `fastmap_accurate' to zero;
|
|
|
- `re_nsub' to the number of subexpressions in PATTERN.
|
|
|
+ if (upper_bound > 1) {
|
|
|
+ append a backward jump to the `succeed_n'
|
|
|
+ that starts this interval.
|
|
|
|
|
|
- PATTERN is the address of the pattern string.
|
|
|
+ When we've reached this during matching,
|
|
|
+ we'll have matched the interval once, so
|
|
|
+ jump back only `upper_bound - 1' times. */
|
|
|
+ STORE_JUMP2(jump_n, b, laststart + 5,
|
|
|
+ upper_bound - 1);
|
|
|
+ b += 5;
|
|
|
|
|
|
- CFLAGS is a series of bits which affect compilation.
|
|
|
+
|
|
|
+ parameter of the `jump_n'; that is `b-2' as
|
|
|
+ an absolute address. `laststart' will be
|
|
|
+ the `set_number_at' we're about to insert;
|
|
|
+ `laststart+3' the number to set, the source
|
|
|
+ for the relative address. But we are
|
|
|
+ inserting into the middle of the pattern --
|
|
|
+ so everything is getting moved up by 5.
|
|
|
+ Conclusion: (b - 2) - (laststart + 3) + 5,
|
|
|
+ i.e., b - laststart.
|
|
|
|
|
|
- If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
|
|
|
- use POSIX basic syntax.
|
|
|
+ We insert this at the beginning of the loop
|
|
|
+ so that if we fail during matching, we'll
|
|
|
+ reinitialize the bounds. */
|
|
|
+ insert_op2(set_number_at, laststart,
|
|
|
+ b - laststart, upper_bound - 1, b);
|
|
|
+ b += 5;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ pending_exact = 0;
|
|
|
+ beg_interval = NULL;
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
- If REG_NEWLINE is set, then . and [^...] don't match newline.
|
|
|
- Also, regexec will try a match beginning after every newline.
|
|
|
+ unfetch_interval:
|
|
|
+
|
|
|
+ assert(beg_interval);
|
|
|
+ p = beg_interval;
|
|
|
+ beg_interval = NULL;
|
|
|
|
|
|
- If REG_ICASE is set, then we considers upper- and lowercase
|
|
|
- versions of letters to be equivalent when matching.
|
|
|
+
|
|
|
+ PATFETCH(c);
|
|
|
|
|
|
- If REG_NOSUB is set, then when PREG is passed to regexec, that
|
|
|
- routine will report only success or failure, and nothing about the
|
|
|
- registers.
|
|
|
+ if (!(syntax & RE_NO_BK_BRACES)) {
|
|
|
+ if (p > pattern && p[-1] == '\\')
|
|
|
+ goto normal_backslash;
|
|
|
+ }
|
|
|
+ goto normal_char;
|
|
|
|
|
|
- It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
|
|
|
- the return codes and their meanings.) */
|
|
|
+#ifdef emacs
|
|
|
+
|
|
|
+ operators. rms says this is ok. --karl */
|
|
|
+ case '=':
|
|
|
+ BUF_PUSH(at_dot);
|
|
|
+ break;
|
|
|
|
|
|
-int regcomp(preg, pattern, cflags)
|
|
|
-regex_t *preg;
|
|
|
-const char *pattern;
|
|
|
-int cflags;
|
|
|
-{
|
|
|
- reg_errcode_t ret;
|
|
|
- reg_syntax_t syntax
|
|
|
- = (cflags & REG_EXTENDED) ?
|
|
|
+ case 's':
|
|
|
+ laststart = b;
|
|
|
+ PATFETCH(c);
|
|
|
+ BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
|
|
|
+ break;
|
|
|
|
|
|
- RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
|
|
|
+ case 'S':
|
|
|
+ laststart = b;
|
|
|
+ PATFETCH(c);
|
|
|
+ BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
|
|
|
+ break;
|
|
|
+#endif
|
|
|
|
|
|
-
|
|
|
- preg->buffer = 0;
|
|
|
- preg->allocated = 0;
|
|
|
- preg->used = 0;
|
|
|
|
|
|
-
|
|
|
- preg->fastmap = (char *) malloc(1 << BYTEWIDTH);
|
|
|
+ case 'w':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ laststart = b;
|
|
|
+ BUF_PUSH(wordchar);
|
|
|
+ break;
|
|
|
|
|
|
- if (cflags & REG_ICASE) {
|
|
|
- unsigned i;
|
|
|
|
|
|
- preg->translate
|
|
|
- = (RE_TRANSLATE_TYPE) malloc(CHAR_SET_SIZE
|
|
|
- * sizeof(*(RE_TRANSLATE_TYPE) 0));
|
|
|
- if (preg->translate == NULL)
|
|
|
- return (int) REG_ESPACE;
|
|
|
+ case 'W':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ laststart = b;
|
|
|
+ BUF_PUSH(notwordchar);
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- for (i = 0; i < CHAR_SET_SIZE; i++)
|
|
|
- preg->translate[i] = ISUPPER(i) ? TOLOWER(i) : i;
|
|
|
- } else
|
|
|
- preg->translate = NULL;
|
|
|
|
|
|
-
|
|
|
- if (cflags & REG_NEWLINE) {
|
|
|
- syntax &= ~RE_DOT_NEWLINE;
|
|
|
- syntax |= RE_HAT_LISTS_NOT_NEWLINE;
|
|
|
-
|
|
|
- preg->newline_anchor = 1;
|
|
|
- } else
|
|
|
- preg->newline_anchor = 0;
|
|
|
+ case '<':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ BUF_PUSH(wordbeg);
|
|
|
+ break;
|
|
|
|
|
|
- preg->no_sub = !!(cflags & REG_NOSUB);
|
|
|
+ case '>':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ BUF_PUSH(wordend);
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- can use strlen here in compiling the pattern. */
|
|
|
- ret = regex_compile(pattern, strlen(pattern), syntax, preg);
|
|
|
+ case 'b':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ BUF_PUSH(wordbound);
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- unmatched close-group: both are REG_EPAREN. */
|
|
|
- if (ret == REG_ERPAREN)
|
|
|
- ret = REG_EPAREN;
|
|
|
+ case 'B':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ BUF_PUSH(notwordbound);
|
|
|
+ break;
|
|
|
|
|
|
- if (ret == REG_NOERROR && preg->fastmap) {
|
|
|
-
|
|
|
- buffer. */
|
|
|
- if (re_compile_fastmap(preg) == -2) {
|
|
|
-
|
|
|
- about it. */
|
|
|
- free(preg->fastmap);
|
|
|
- preg->fastmap = NULL;
|
|
|
- }
|
|
|
- }
|
|
|
+ case '`':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ BUF_PUSH(begbuf);
|
|
|
+ break;
|
|
|
|
|
|
- return (int) ret;
|
|
|
-}
|
|
|
+ case '\'':
|
|
|
+ if (syntax & RE_NO_GNU_OPS)
|
|
|
+ goto normal_char;
|
|
|
+ BUF_PUSH(endbuf);
|
|
|
+ break;
|
|
|
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__regcomp, regcomp)
|
|
|
-#endif
|
|
|
-
|
|
|
- string STRING.
|
|
|
+ case '1':
|
|
|
+ case '2':
|
|
|
+ case '3':
|
|
|
+ case '4':
|
|
|
+ case '5':
|
|
|
+ case '6':
|
|
|
+ case '7':
|
|
|
+ case '8':
|
|
|
+ case '9':
|
|
|
+ if (syntax & RE_NO_BK_REFS)
|
|
|
+ goto normal_char;
|
|
|
|
|
|
- If NMATCH is zero or REG_NOSUB was set in the cflags argument to
|
|
|
- `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
|
|
|
- least NMATCH elements, and we set them to the offsets of the
|
|
|
- corresponding matched substrings.
|
|
|
+ c1 = c - '0';
|
|
|
|
|
|
- EFLAGS specifies `execution flags' which affect matching: if
|
|
|
- REG_NOTBOL is set, then ^ does not match at the beginning of the
|
|
|
- string; if REG_NOTEOL is set, then $ does not match at the end.
|
|
|
+ if (c1 > regnum)
|
|
|
+ FREE_STACK_RETURN(REG_ESUBREG);
|
|
|
|
|
|
- We return 0 if we find a match and REG_NOMATCH if not. */
|
|
|
-int regexec(preg, string, nmatch, pmatch, eflags)
|
|
|
-const regex_t *preg;
|
|
|
-const char *string;
|
|
|
-size_t nmatch;
|
|
|
-regmatch_t pmatch[];
|
|
|
-int eflags;
|
|
|
-{
|
|
|
- int ret;
|
|
|
- struct re_registers regs;
|
|
|
- regex_t private_preg;
|
|
|
- int len = strlen(string);
|
|
|
- boolean want_reg_info = !preg->no_sub && nmatch > 0;
|
|
|
+
|
|
|
+ if (group_in_compile_stack(compile_stack, (regnum_t) c1))
|
|
|
+ goto normal_char;
|
|
|
|
|
|
- private_preg = *preg;
|
|
|
+ laststart = b;
|
|
|
+ BUF_PUSH_2(duplicate, c1);
|
|
|
+ break;
|
|
|
|
|
|
- private_preg.not_bol = !!(eflags & REG_NOTBOL);
|
|
|
- private_preg.not_eol = !!(eflags & REG_NOTEOL);
|
|
|
|
|
|
-
|
|
|
- information about, via `nmatch'. We have to pass that on to the
|
|
|
- matching routines. */
|
|
|
- private_preg.regs_allocated = REGS_FIXED;
|
|
|
+ case '+':
|
|
|
+ case '?':
|
|
|
+ if (syntax & RE_BK_PLUS_QM)
|
|
|
+ goto handle_plus;
|
|
|
+ else
|
|
|
+ goto normal_backslash;
|
|
|
|
|
|
- if (want_reg_info) {
|
|
|
- regs.num_regs = nmatch;
|
|
|
- regs.start = TALLOC(nmatch * 2, regoff_t);
|
|
|
- if (regs.start == NULL)
|
|
|
- return (int) REG_NOMATCH;
|
|
|
- regs.end = regs.start + nmatch;
|
|
|
- }
|
|
|
+ default:
|
|
|
+ normal_backslash:
|
|
|
+
|
|
|
+ not to translate; but if we don't translate it
|
|
|
+ it will never match anything. */
|
|
|
+ c = TRANSLATE(c);
|
|
|
+ goto normal_char;
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
-
|
|
|
- ret = re_search(&private_preg, string, len,
|
|
|
- 0, len,
|
|
|
- want_reg_info ? ®s : (struct re_registers *) 0);
|
|
|
|
|
|
-
|
|
|
- if (want_reg_info) {
|
|
|
- if (ret >= 0) {
|
|
|
- unsigned r;
|
|
|
+ default:
|
|
|
+
|
|
|
+ normal_char:
|
|
|
+
|
|
|
+ if (!pending_exact
|
|
|
+
|
|
|
+ || pending_exact + *pending_exact + 1 != b
|
|
|
+
|
|
|
+ || *pending_exact == (1 << BYTEWIDTH) - 1
|
|
|
+
|
|
|
+ || *p == '*' || *p == '^' || ((syntax & RE_BK_PLUS_QM)
|
|
|
+ ? *p == '\\' && (p[1] == '+'
|
|
|
+ || p[1] ==
|
|
|
+ '?') : (*p
|
|
|
+ ==
|
|
|
+ '+'
|
|
|
+ ||
|
|
|
+ *p
|
|
|
+ ==
|
|
|
+ '?'))
|
|
|
+ || ((syntax & RE_INTERVALS)
|
|
|
+ && ((syntax & RE_NO_BK_BRACES)
|
|
|
+ ? *p == '{' : (p[0] == '\\' && p[1] == '{')))) {
|
|
|
+
|
|
|
|
|
|
- for (r = 0; r < nmatch; r++) {
|
|
|
- pmatch[r].rm_so = regs.start[r];
|
|
|
- pmatch[r].rm_eo = regs.end[r];
|
|
|
+ laststart = b;
|
|
|
+
|
|
|
+ BUF_PUSH_2(exactn, 0);
|
|
|
+ pending_exact = b - 1;
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
-
|
|
|
- free(regs.start);
|
|
|
- }
|
|
|
+ BUF_PUSH(c);
|
|
|
+ (*pending_exact)++;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
- return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
|
|
|
-}
|
|
|
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__regexec, regexec)
|
|
|
-#endif
|
|
|
-
|
|
|
- from either regcomp or regexec. We don't use PREG here. */
|
|
|
- size_t regerror(errcode, preg, errbuf, errbuf_size)
|
|
|
-int errcode;
|
|
|
-const regex_t *preg;
|
|
|
-char *errbuf;
|
|
|
-size_t errbuf_size;
|
|
|
-{
|
|
|
- const char *msg;
|
|
|
- size_t msg_size;
|
|
|
+
|
|
|
|
|
|
- if (errcode < 0 || errcode >= (int) (sizeof(re_error_msgid_idx)
|
|
|
- / sizeof(re_error_msgid_idx[0])))
|
|
|
-
|
|
|
- to this routine. If we are given anything else, or if other regex
|
|
|
- code generates an invalid error code, then the program has a bug.
|
|
|
- Dump core so we can fix it. */
|
|
|
- abort();
|
|
|
+ if (fixup_alt_jump)
|
|
|
+ STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
|
|
|
|
|
|
- msg = gettext(re_error_msgid + re_error_msgid_idx[errcode]);
|
|
|
+ if (!COMPILE_STACK_EMPTY)
|
|
|
+ FREE_STACK_RETURN(REG_EPAREN);
|
|
|
|
|
|
- msg_size = strlen(msg) + 1;
|
|
|
+
|
|
|
+ the first time we reach the end of the compiled pattern. */
|
|
|
+ if (syntax & RE_NO_POSIX_BACKTRACKING)
|
|
|
+ BUF_PUSH(succeed);
|
|
|
|
|
|
- if (errbuf_size != 0) {
|
|
|
- if (msg_size > errbuf_size) {
|
|
|
-#if defined HAVE_MEMPCPY || defined _LIBC
|
|
|
- *((char *) __mempcpy(errbuf, msg, errbuf_size - 1)) = '\0';
|
|
|
-#else
|
|
|
- memcpy(errbuf, msg, errbuf_size - 1);
|
|
|
- errbuf[errbuf_size - 1] = 0;
|
|
|
-#endif
|
|
|
- } else
|
|
|
- memcpy(errbuf, msg, msg_size);
|
|
|
- }
|
|
|
+ free(compile_stack.stack);
|
|
|
|
|
|
- return msg_size;
|
|
|
-}
|
|
|
+
|
|
|
+ bufp->used = b - bufp->buffer;
|
|
|
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__regerror, regerror)
|
|
|
-#endif
|
|
|
-
|
|
|
-void regfree(preg)
|
|
|
-regex_t *preg;
|
|
|
-{
|
|
|
- if (preg->buffer != NULL)
|
|
|
- free(preg->buffer);
|
|
|
- preg->buffer = NULL;
|
|
|
+#ifdef DEBUG
|
|
|
+ if (debug) {
|
|
|
+ DEBUG_PRINT1("\nCompiled pattern: \n");
|
|
|
+ print_compiled_pattern(bufp);
|
|
|
+ }
|
|
|
+#endif
|
|
|
|
|
|
- preg->allocated = 0;
|
|
|
- preg->used = 0;
|
|
|
+#ifndef MATCH_MAY_ALLOCATE
|
|
|
+
|
|
|
+ isn't necessary unless we're trying to avoid calling alloca in
|
|
|
+ the search and match routines. */
|
|
|
+ {
|
|
|
+ int num_regs = bufp->re_nsub + 1;
|
|
|
|
|
|
- if (preg->fastmap != NULL)
|
|
|
- free(preg->fastmap);
|
|
|
- preg->fastmap = NULL;
|
|
|
- preg->fastmap_accurate = 0;
|
|
|
+
|
|
|
+ is strictly greater than re_max_failures, the largest possible stack
|
|
|
+ is 2 * re_max_failures failure points. */
|
|
|
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
|
|
|
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
|
|
|
|
|
|
- if (preg->translate != NULL)
|
|
|
- free(preg->translate);
|
|
|
- preg->translate = NULL;
|
|
|
-}
|
|
|
+# ifdef emacs
|
|
|
+ if (!fail_stack.stack)
|
|
|
+ fail_stack.stack
|
|
|
+ = (fail_stack_elt_t *) xmalloc(fail_stack.size
|
|
|
+ *
|
|
|
+ sizeof
|
|
|
+ (fail_stack_elt_t));
|
|
|
+ else
|
|
|
+ fail_stack.stack =
|
|
|
+ (fail_stack_elt_t *) xrealloc(fail_stack.stack,
|
|
|
+ (fail_stack.size *
|
|
|
+ sizeof
|
|
|
+ (fail_stack_elt_t)));
|
|
|
+# else
|
|
|
+ if (!fail_stack.stack)
|
|
|
+ fail_stack.stack
|
|
|
+ = (fail_stack_elt_t *) malloc(fail_stack.size
|
|
|
+ *
|
|
|
+ sizeof
|
|
|
+ (fail_stack_elt_t));
|
|
|
+ else
|
|
|
+ fail_stack.stack =
|
|
|
+ (fail_stack_elt_t *) realloc(fail_stack.stack,
|
|
|
+ (fail_stack.size *
|
|
|
+ sizeof
|
|
|
+ (fail_stack_elt_t)));
|
|
|
+# endif
|
|
|
+ }
|
|
|
|
|
|
-#ifdef _LIBC
|
|
|
-weak_alias(__regfree, regfree)
|
|
|
-#endif
|
|
|
-#endif
|
|
|
+ regex_grow_registers(num_regs);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ return REG_NOERROR;
|
|
|
+}
|