12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211 |
- /*
- * Simple Regular Expression functions. Derived from Unix 7th Edition,
- * /usr/src/cmd/expr.y
- *
- * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002.
- *
- * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * Redistributions of source code and documentation must retain the
- * above copyright notice, this list of conditions and the following
- * disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed or owned by Caldera
- * International, Inc.
- * Neither the name of Caldera International, Inc. nor the names of
- * other contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
- * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
- * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
- #define REGEXP_H_USED __attribute__ ((used))
- #elif defined __GNUC__
- #define REGEXP_H_USED __attribute__ ((unused))
- #else
- #define REGEXP_H_USED
- #endif
- static const char regexp_h_sccsid[] REGEXP_H_USED =
- "@(#)regexp.sl 1.56 (gritter) 5/29/05";
- #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__)
- #define REGEXP_H_WCHARS
- #endif
- #define CBRA 2
- #define CCHR 4
- #define CDOT 8
- #define CCL 12
- /* CLNUM 14 used in sed */
- /* CEND 16 used in sed */
- #define CDOL 20
- #define CCEOF 22
- #define CKET 24
- #define CBACK 36
- #define CNCL 40
- #define CBRC 44
- #define CLET 48
- #define CCH1 52
- #define CCH2 56
- #define CCH3 60
- #define STAR 01
- #define RNGE 03
- #define REGEXP_H_LEAST 0100
- #ifdef REGEXP_H_WCHARS
- #define CMB 0200
- #else /* !REGEXP_H_WCHARS */
- #define CMB 0
- #endif /* !REGEXP_H_WCHARS */
- #define NBRA 9
- #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
- #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
- #ifdef REGEXP_H_WCHARS
- #define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07])
- #endif
- #include <ctype.h>
- #include <string.h>
- #include <limits.h>
- #ifdef REGEXP_H_WCHARS
- #include <stdlib.h>
- #include <wchar.h>
- #include <wctype.h>
- #endif /* REGEXP_H_WCHARS */
- #define regexp_h_uletter(c) (isalpha(c) || (c) == '_')
- #ifdef REGEXP_H_WCHARS
- #define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_')
- /*
- * Used to allocate memory for the multibyte star algorithm.
- */
- #ifndef regexp_h_malloc
- #define regexp_h_malloc(n) malloc(n)
- #endif
- #ifndef regexp_h_free
- #define regexp_h_free(p) free(p)
- #endif
- /*
- * Can be predefined to 'inline' to inline some multibyte functions;
- * may improve performance for files that contain many multibyte
- * sequences.
- */
- #ifndef regexp_h_inline
- #define regexp_h_inline
- #endif
- /*
- * Mask to determine whether the first byte of a sequence possibly
- * starts a multibyte character. Set to 0377 to force mbtowc() for
- * any byte sequence (except 0).
- */
- #ifndef REGEXP_H_MASK
- #define REGEXP_H_MASK 0200
- #endif
- #endif /* REGEXP_H_WCHARS */
- /*
- * For regexpr.h.
- */
- #ifndef regexp_h_static
- #define regexp_h_static
- #endif
- #ifndef REGEXP_H_STEP_INIT
- #define REGEXP_H_STEP_INIT
- #endif
- #ifndef REGEXP_H_ADVANCE_INIT
- #define REGEXP_H_ADVANCE_INIT
- #endif
- char *braslist[NBRA];
- char *braelist[NBRA];
- int nbra;
- char *loc1, *loc2, *locs;
- int sed;
- int nodelim;
- regexp_h_static int circf;
- regexp_h_static int low;
- regexp_h_static int size;
- regexp_h_static unsigned char bittab[] = {
- 1,
- 2,
- 4,
- 8,
- 16,
- 32,
- 64,
- 128
- };
- static int regexp_h_advance(register const char *lp,
- register const char *ep);
- static void regexp_h_getrnge(register const char *str, int least);
- static const char *regexp_h_bol; /* beginning of input line (for \<) */
- #ifdef REGEXP_H_WCHARS
- static int regexp_h_wchars;
- static int regexp_h_mbcurmax;
- static const char *regexp_h_firstwc; /* location of first
- multibyte character
- on input line */
- #define regexp_h_getwc(c) { \
- if (regexp_h_wchars) { \
- char mbbuf[MB_LEN_MAX + 1], *mbptr; \
- wchar_t wcbuf; \
- int mb, len; \
- mbptr = mbbuf; \
- do { \
- mb = GETC(); \
- *mbptr++ = mb; \
- *mbptr = '\0'; \
- } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \
- && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \
- if (len == -1) \
- ERROR(67); \
- c = wcbuf; \
- } else { \
- c = GETC(); \
- } \
- }
- #define regexp_h_store(wc, mb, me) { \
- int len; \
- if (wc == WEOF) \
- ERROR(67); \
- if ((len = me - mb) <= regexp_h_mbcurmax) { \
- char mt[MB_LEN_MAX]; \
- if (wctomb(mt, wc) >= len) \
- ERROR(50); \
- } \
- switch (len = wctomb(mb, wc)) { \
- case -1: \
- ERROR(67); \
- case 0: \
- mb++; \
- break; \
- default: \
- mb += len; \
- } \
- }
- static regexp_h_inline wint_t
- regexp_h_fetchwc(const char **mb, int islp)
- {
- wchar_t wc;
- int len;
- if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) {
- (*mb)++;
- return WEOF;
- }
- if (islp && regexp_h_firstwc == NULL)
- regexp_h_firstwc = *mb;
- /*if (len == 0) {
- (*mb)++;
- return L'\0';
- } handled in singlebyte code */
- *mb += len;
- return wc;
- }
- #define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \
- (*(mb)++&0377): \
- regexp_h_fetchwc(&(mb), islp))
- static regexp_h_inline wint_t
- regexp_h_showwc(const char *mb)
- {
- wchar_t wc;
- if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0)
- return WEOF;
- return wc;
- }
- #define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \
- regexp_h_showwc(mb))
- /*
- * Return the character immediately preceding mb. Since no byte is
- * required to be the first byte of a character, the longest multibyte
- * character ending at &[mb-1] is searched.
- */
- static regexp_h_inline wint_t
- regexp_h_previous(const char *mb)
- {
- const char *p = mb;
- wchar_t wc, lastwc = WEOF;
- int len, max = 0;
- if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc)
- return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF);
- while (p-- > regexp_h_bol) {
- mbtowc(NULL, NULL, 0);
- if ((len = mbtowc(&wc, p, mb - p)) >= 0) {
- if (len < max || len < mb - p)
- break;
- max = len;
- lastwc = wc;
- } else if (len < 0 && max > 0)
- break;
- }
- return lastwc;
- }
- #define regexp_h_cclass(set, c, af) \
- ((c) == 0 || (c) == WEOF ? 0 : ( \
- ((c) > 0177) ? \
- regexp_h_cclass_wc(set, c, af) : ( \
- REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \
- ) \
- ) \
- )
- static regexp_h_inline int
- regexp_h_cclass_wc(const char *set, register wint_t c, int af)
- {
- register wint_t wc, wl = WEOF;
- const char *end;
- end = &set[18] + set[0] - 1;
- set += 17;
- while (set < end) {
- wc = regexp_h_fetch(set, 0);
- #ifdef REGEXP_H_VI_BACKSLASH
- if (wc == '\\' && set < end &&
- (*set == ']' || *set == '-' ||
- *set == '^' || *set == '\\')) {
- wc = regexp_h_fetch(set, 0);
- } else
- #endif /* REGEXP_H_VI_BACKSLASH */
- if (wc == '-' && wl != WEOF && set < end) {
- wc = regexp_h_fetch(set, 0);
- #ifdef REGEXP_H_VI_BACKSLASH
- if (wc == '\\' && set < end &&
- (*set == ']' || *set == '-' ||
- *set == '^' || *set == '\\')) {
- wc = regexp_h_fetch(set, 0);
- }
- #endif /* REGEXP_H_VI_BACKSLASH */
- if (c > wl && c < wc)
- return af;
- }
- if (c == wc)
- return af;
- wl = wc;
- }
- return !af;
- }
- #else /* !REGEXP_H_WCHARS */
- #define regexp_h_wchars 0
- #define regexp_h_getwc(c) { c = GETC(); }
- #endif /* !REGEXP_H_WCHARS */
- regexp_h_static char *
- compile(char *instring, char *ep, const char *endbuf, int seof)
- {
- INIT /* Dependent declarations and initializations */
- register int c;
- register int eof = seof;
- char *lastep = instring;
- int cclcnt;
- char bracket[NBRA], *bracketp;
- int closed;
- char neg;
- int lc;
- int i, cflg;
- #ifdef REGEXP_H_WCHARS
- char *eq;
- regexp_h_mbcurmax = MB_CUR_MAX;
- regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0;
- #endif
- lastep = 0;
- bracketp = bracket;
- if((c = GETC()) == eof || c == '\n') {
- if (c == '\n') {
- UNGETC(c);
- nodelim = 1;
- }
- if(*ep == 0 && !sed)
- ERROR(41);
- if (bracketp > bracket)
- ERROR(42);
- RETURN(ep);
- }
- circf = closed = nbra = 0;
- if (c == '^')
- circf++;
- else
- UNGETC(c);
- for (;;) {
- if (ep >= endbuf)
- ERROR(50);
- regexp_h_getwc(c);
- if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
- lastep = ep;
- if (c == eof) {
- *ep++ = CCEOF;
- if (bracketp > bracket)
- ERROR(42);
- RETURN(ep);
- }
- switch (c) {
- case '.':
- *ep++ = CDOT|regexp_h_wchars;
- continue;
- case '\n':
- if (sed == 0) {
- UNGETC(c);
- *ep++ = CCEOF;
- nodelim = 1;
- RETURN(ep);
- }
- ERROR(36);
- case '*':
- if (lastep==0 || *lastep==CBRA || *lastep==CKET ||
- *lastep==(CBRC|regexp_h_wchars) ||
- *lastep==(CLET|regexp_h_wchars))
- goto defchar;
- *lastep |= STAR;
- continue;
- case '$':
- if(PEEKC() != eof)
- goto defchar;
- *ep++ = CDOL;
- continue;
- case '[':
- #ifdef REGEXP_H_WCHARS
- if (regexp_h_wchars == 0) {
- #endif
- if(&ep[33] >= endbuf)
- ERROR(50);
- *ep++ = CCL;
- lc = 0;
- for(i = 0; i < 32; i++)
- ep[i] = 0;
- neg = 0;
- if((c = GETC()) == '^') {
- neg = 1;
- c = GETC();
- }
- do {
- c &= 0377;
- if(c == '\0' || c == '\n')
- ERROR(49);
- #ifdef REGEXP_H_VI_BACKSLASH
- if(c == '\\' && ((c = PEEKC()) == ']' ||
- c == '-' || c == '^' ||
- c == '\\')) {
- c = GETC();
- c &= 0377;
- } else
- #endif /* REGEXP_H_VI_BACKSLASH */
- if(c == '-' && lc != 0) {
- if ((c = GETC()) == ']') {
- PLACE('-');
- break;
- }
- #ifdef REGEXP_H_VI_BACKSLASH
- if(c == '\\' &&
- ((c = PEEKC()) == ']' ||
- c == '-' ||
- c == '^' ||
- c == '\\'))
- c = GETC();
- #endif /* REGEXP_H_VI_BACKSLASH */
- c &= 0377;
- while(lc < c) {
- PLACE(lc);
- lc++;
- }
- }
- lc = c;
- PLACE(c);
- } while((c = GETC()) != ']');
- if(neg) {
- for(cclcnt = 0; cclcnt < 32; cclcnt++)
- ep[cclcnt] ^= 0377;
- ep[0] &= 0376;
- }
- ep += 32;
- #ifdef REGEXP_H_WCHARS
- } else {
- if (&ep[18] >= endbuf)
- ERROR(50);
- *ep++ = CCL|CMB;
- *ep++ = 0;
- lc = 0;
- for (i = 0; i < 16; i++)
- ep[i] = 0;
- eq = &ep[16];
- regexp_h_getwc(c);
- if (c == L'^') {
- regexp_h_getwc(c);
- ep[-2] = CNCL|CMB;
- }
- do {
- if (c == '\0' || c == '\n')
- ERROR(49);
- #ifdef REGEXP_H_VI_BACKSLASH
- if(c == '\\' && ((c = PEEKC()) == ']' ||
- c == '-' || c == '^' ||
- c == '\\')) {
- regexp_h_store(c, eq, endbuf);
- regexp_h_getwc(c);
- } else
- #endif /* REGEXP_H_VI_BACKSLASH */
- if (c == '-' && lc != 0 && lc <= 0177) {
- regexp_h_store(c, eq, endbuf);
- regexp_h_getwc(c);
- if (c == ']') {
- PLACE('-');
- break;
- }
- #ifdef REGEXP_H_VI_BACKSLASH
- if(c == '\\' &&
- ((c = PEEKC()) == ']' ||
- c == '-' ||
- c == '^' ||
- c == '\\')) {
- regexp_h_store(c, eq,
- endbuf);
- regexp_h_getwc(c);
- }
- #endif /* REGEXP_H_VI_BACKSLASH */
- while (lc < (c & 0177)) {
- PLACE(lc);
- lc++;
- }
- }
- lc = c;
- if (c <= 0177)
- PLACE(c);
- regexp_h_store(c, eq, endbuf);
- regexp_h_getwc(c);
- } while (c != L']');
- if ((i = eq - &ep[16]) > 255)
- ERROR(50);
- lastep[1] = i;
- ep = eq;
- }
- #endif /* REGEXP_H_WCHARS */
- continue;
- case '\\':
- regexp_h_getwc(c);
- switch(c) {
- case '(':
- if(nbra >= NBRA)
- ERROR(43);
- *bracketp++ = nbra;
- *ep++ = CBRA;
- *ep++ = nbra++;
- continue;
- case ')':
- if(bracketp <= bracket)
- ERROR(42);
- *ep++ = CKET;
- *ep++ = *--bracketp;
- closed++;
- continue;
- case '<':
- *ep++ = CBRC|regexp_h_wchars;
- continue;
- case '>':
- *ep++ = CLET|regexp_h_wchars;
- continue;
- case '{':
- if(lastep == (char *) (0))
- goto defchar;
- *lastep |= RNGE;
- cflg = 0;
- nlim:
- c = GETC();
- i = 0;
- do {
- if ('0' <= c && c <= '9')
- i = 10 * i + c - '0';
- else
- ERROR(16);
- } while(((c = GETC()) != '\\') && (c != ','));
- if (i > 255)
- ERROR(11);
- *ep++ = i;
- if (c == ',') {
- if(cflg++)
- ERROR(44);
- if((c = GETC()) == '\\') {
- *ep++ = (char)255;
- *lastep |= REGEXP_H_LEAST;
- } else {
- UNGETC(c);
- goto nlim; /* get 2'nd number */
- }
- }
- if(GETC() != '}')
- ERROR(45);
- if(!cflg) /* one number */
- *ep++ = i;
- else if((ep[-1] & 0377) < (ep[-2] & 0377))
- ERROR(46);
- continue;
- case '\n':
- ERROR(36);
- case 'n':
- c = '\n';
- goto defchar;
- default:
- if(c >= '1' && c <= '9') {
- if((c -= '1') >= closed)
- ERROR(25);
- *ep++ = CBACK;
- *ep++ = c;
- continue;
- }
- }
- /* Drop through to default to use \ to turn off special chars */
- defchar:
- default:
- lastep = ep;
- #ifdef REGEXP_H_WCHARS
- if (regexp_h_wchars == 0) {
- #endif
- *ep++ = CCHR;
- *ep++ = c;
- #ifdef REGEXP_H_WCHARS
- } else {
- char mbbuf[MB_LEN_MAX];
- switch (wctomb(mbbuf, c)) {
- case 1: *ep++ = CCH1;
- break;
- case 2: *ep++ = CCH2;
- break;
- case 3: *ep++ = CCH3;
- break;
- default:
- *ep++ = CCHR|CMB;
- }
- regexp_h_store(c, ep, endbuf);
- }
- #endif /* REGEXP_H_WCHARS */
- }
- }
- }
- int
- step(const char *p1, const char *p2)
- {
- register int c;
- #ifdef REGEXP_H_WCHARS
- register int d;
- #endif /* REGEXP_H_WCHARS */
- REGEXP_H_STEP_INIT /* get circf */
- regexp_h_bol = p1;
- #ifdef REGEXP_H_WCHARS
- regexp_h_firstwc = NULL;
- #endif /* REGEXP_H_WCHARS */
- if (circf) {
- loc1 = (char *)p1;
- return(regexp_h_advance(p1, p2));
- }
- /* fast check for first character */
- if (*p2==CCHR) {
- c = p2[1] & 0377;
- do {
- if ((*p1 & 0377) != c)
- continue;
- if (regexp_h_advance(p1, p2)) {
- loc1 = (char *)p1;
- return(1);
- }
- } while (*p1++);
- return(0);
- }
- #ifdef REGEXP_H_WCHARS
- else if (*p2==CCH1) {
- do {
- if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) {
- loc1 = (char *)p1;
- return(1);
- }
- c = regexp_h_fetch(p1, 1);
- } while (c);
- return(0);
- } else if (*p2==CCH2) {
- do {
- if (p1[0] == p2[1] && p1[1] == p2[2] &&
- regexp_h_advance(p1, p2)) {
- loc1 = (char *)p1;
- return(1);
- }
- c = regexp_h_fetch(p1, 1);
- } while (c);
- return(0);
- } else if (*p2==CCH3) {
- do {
- if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&&
- regexp_h_advance(p1, p2)) {
- loc1 = (char *)p1;
- return(1);
- }
- c = regexp_h_fetch(p1, 1);
- } while (c);
- return(0);
- } else if ((*p2&0377)==(CCHR|CMB)) {
- d = regexp_h_fetch(p2, 0);
- do {
- c = regexp_h_fetch(p1, 1);
- if (c == d && regexp_h_advance(p1, p2)) {
- loc1 = (char *)p1;
- return(1);
- }
- } while(c);
- return(0);
- }
- /* regular algorithm */
- if (regexp_h_wchars)
- do {
- if (regexp_h_advance(p1, p2)) {
- loc1 = (char *)p1;
- return(1);
- }
- c = regexp_h_fetch(p1, 1);
- } while (c);
- else
- #endif /* REGEXP_H_WCHARS */
- do {
- if (regexp_h_advance(p1, p2)) {
- loc1 = (char *)p1;
- return(1);
- }
- } while (*p1++);
- return(0);
- }
- #ifdef REGEXP_H_WCHARS
- /*
- * It is painfully slow to read character-wise backwards in a
- * multibyte string (see regexp_h_previous() above). For the star
- * algorithm, we therefore keep track of every character as it is
- * read in forward direction.
- *
- * Don't use alloca() for stack blocks since there is no measurable
- * speedup and huge amounts of memory are used up for long input
- * lines.
- */
- #ifndef REGEXP_H_STAKBLOK
- #define REGEXP_H_STAKBLOK 1000
- #endif
- struct regexp_h_stack {
- struct regexp_h_stack *s_nxt;
- struct regexp_h_stack *s_prv;
- const char *s_ptr[REGEXP_H_STAKBLOK];
- };
- #define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \
- regexp_h_pushwc(sb, sp, sc, lp) : (void)0)
- static regexp_h_inline void
- regexp_h_pushwc(struct regexp_h_stack **sb,
- struct regexp_h_stack **sp,
- const char ***sc, const char *lp)
- {
- if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc)
- return;
- if (*sb == NULL) {
- if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL)
- return;
- (*sb)->s_nxt = (*sb)->s_prv = NULL;
- *sp = *sb;
- *sc = &(*sb)->s_ptr[0];
- } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) {
- if ((*sp)->s_nxt == NULL) {
- struct regexp_h_stack *bq;
- if ((bq = regexp_h_malloc(sizeof *bq)) == NULL)
- return;
- bq->s_nxt = NULL;
- bq->s_prv = *sp;
- (*sp)->s_nxt = bq;
- *sp = bq;
- } else
- *sp = (*sp)->s_nxt;
- *sc = &(*sp)->s_ptr[0];
- }
- *(*sc)++ = lp;
- }
- static regexp_h_inline const char *
- regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp,
- const char ***sc, const char *lp)
- {
- if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc)
- return &lp[-1];
- if (*sp == NULL)
- return regexp_h_firstwc;
- if (*sc == &(*sp)->s_ptr[0]) {
- if ((*sp)->s_prv == NULL) {
- regexp_h_free(*sp);
- *sp = NULL;
- *sb = NULL;
- return regexp_h_firstwc;
- }
- *sp = (*sp)->s_prv;
- regexp_h_free((*sp)->s_nxt);
- (*sp)->s_nxt = NULL ;
- *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK];
- }
- return *(--(*sc));
- }
- static void
- regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp)
- {
- for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt)
- if ((*sp)->s_prv)
- regexp_h_free((*sp)->s_prv);
- if (*sp) {
- if ((*sp)->s_prv)
- regexp_h_free((*sp)->s_prv);
- regexp_h_free(*sp);
- }
- *sp = *sb = NULL;
- }
- #else /* !REGEXP_H_WCHARS */
- #define regexp_h_push(sb, sp, sc, lp)
- #endif /* !REGEXP_H_WCHARS */
- static int
- regexp_h_advance(const char *lp, const char *ep)
- {
- register const char *curlp;
- int c, least;
- #ifdef REGEXP_H_WCHARS
- int d;
- struct regexp_h_stack *sb = NULL, *sp = NULL;
- const char **sc;
- #endif /* REGEXP_H_WCHARS */
- char *bbeg;
- int ct;
- for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) {
- case CCHR:
- #ifdef REGEXP_H_WCHARS
- case CCH1:
- #endif
- if (*ep++ == *lp++)
- continue;
- return(0);
- #ifdef REGEXP_H_WCHARS
- case CCHR|CMB:
- if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1))
- continue;
- return(0);
- case CCH2:
- if (ep[0] == lp[0] && ep[1] == lp[1]) {
- ep += 2, lp += 2;
- continue;
- }
- return(0);
- case CCH3:
- if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) {
- ep += 3, lp += 3;
- continue;
- }
- return(0);
- #endif /* REGEXP_H_WCHARS */
- case CDOT:
- if (*lp++)
- continue;
- return(0);
- #ifdef REGEXP_H_WCHARS
- case CDOT|CMB:
- if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF)
- continue;
- return(0);
- #endif /* REGEXP_H_WCHARS */
- case CDOL:
- if (*lp==0)
- continue;
- return(0);
- case CCEOF:
- loc2 = (char *)lp;
- return(1);
- case CCL:
- c = *lp++ & 0377;
- if(ISTHERE(c)) {
- ep += 32;
- continue;
- }
- return(0);
- #ifdef REGEXP_H_WCHARS
- case CCL|CMB:
- case CNCL|CMB:
- c = regexp_h_fetch(lp, 1);
- if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) {
- ep += (*ep & 0377) + 17;
- continue;
- }
- return 0;
- #endif /* REGEXP_H_WCHARS */
- case CBRA:
- braslist[*ep++ & 0377] = (char *)lp;
- continue;
- case CKET:
- braelist[*ep++ & 0377] = (char *)lp;
- continue;
- case CBRC:
- if (lp == regexp_h_bol && locs == NULL)
- continue;
- if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377))
- && !regexp_h_uletter(lp[-1] & 0377)
- && !isdigit(lp[-1] & 0377))
- continue;
- return(0);
- #ifdef REGEXP_H_WCHARS
- case CBRC|CMB:
- c = regexp_h_show(lp);
- d = regexp_h_previous(lp);
- if ((iswdigit(c) || regexp_h_wuletter(c))
- && !regexp_h_wuletter(d)
- && !iswdigit(d))
- continue;
- return(0);
- #endif /* REGEXP_H_WCHARS */
- case CLET:
- if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377))
- continue;
- return(0);
- #ifdef REGEXP_H_WCHARS
- case CLET|CMB:
- c = regexp_h_show(lp);
- if (!regexp_h_wuletter(c) && !iswdigit(c))
- continue;
- return(0);
- #endif /* REGEXP_H_WCHARS */
- case CCHR|RNGE:
- c = *ep++;
- regexp_h_getrnge(ep, least);
- while(low--)
- if(*lp++ != c)
- return(0);
- curlp = lp;
- while(size--) {
- regexp_h_push(&sb, &sp, &sc, lp);
- if(*lp++ != c)
- break;
- }
- if(size < 0) {
- regexp_h_push(&sb, &sp, &sc, lp);
- lp++;
- }
- ep += 2;
- goto star;
- #ifdef REGEXP_H_WCHARS
- case CCHR|RNGE|CMB:
- case CCH1|RNGE:
- case CCH2|RNGE:
- case CCH3|RNGE:
- c = regexp_h_fetch(ep, 0);
- regexp_h_getrnge(ep, least);
- while (low--)
- if (regexp_h_fetch(lp, 1) != c)
- return 0;
- curlp = lp;
- while (size--) {
- regexp_h_push(&sb, &sp, &sc, lp);
- if (regexp_h_fetch(lp, 1) != c)
- break;
- }
- if(size < 0) {
- regexp_h_push(&sb, &sp, &sc, lp);
- regexp_h_fetch(lp, 1);
- }
- ep += 2;
- goto star;
- #endif /* REGEXP_H_WCHARS */
- case CDOT|RNGE:
- regexp_h_getrnge(ep, least);
- while(low--)
- if(*lp++ == '\0')
- return(0);
- curlp = lp;
- while(size--) {
- regexp_h_push(&sb, &sp, &sc, lp);
- if(*lp++ == '\0')
- break;
- }
- if(size < 0) {
- regexp_h_push(&sb, &sp, &sc, lp);
- lp++;
- }
- ep += 2;
- goto star;
- #ifdef REGEXP_H_WCHARS
- case CDOT|RNGE|CMB:
- regexp_h_getrnge(ep, least);
- while (low--)
- if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
- return 0;
- curlp = lp;
- while (size--) {
- regexp_h_push(&sb, &sp, &sc, lp);
- if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
- break;
- }
- if (size < 0) {
- regexp_h_push(&sb, &sp, &sc, lp);
- regexp_h_fetch(lp, 1);
- }
- ep += 2;
- goto star;
- #endif /* REGEXP_H_WCHARS */
- case CCL|RNGE:
- regexp_h_getrnge(ep + 32, least);
- while(low--) {
- c = *lp++ & 0377;
- if(!ISTHERE(c))
- return(0);
- }
- curlp = lp;
- while(size--) {
- regexp_h_push(&sb, &sp, &sc, lp);
- c = *lp++ & 0377;
- if(!ISTHERE(c))
- break;
- }
- if(size < 0) {
- regexp_h_push(&sb, &sp, &sc, lp);
- lp++;
- }
- ep += 34; /* 32 + 2 */
- goto star;
- #ifdef REGEXP_H_WCHARS
- case CCL|RNGE|CMB:
- case CNCL|RNGE|CMB:
- regexp_h_getrnge(ep + (*ep & 0377) + 17, least);
- while (low--) {
- c = regexp_h_fetch(lp, 1);
- if (!regexp_h_cclass(ep, c,
- (ep[-1] & 0377 & ~REGEXP_H_LEAST)
- == (CCL|RNGE|CMB)))
- return 0;
- }
- curlp = lp;
- while (size--) {
- regexp_h_push(&sb, &sp, &sc, lp);
- c = regexp_h_fetch(lp, 1);
- if (!regexp_h_cclass(ep, c,
- (ep[-1] & 0377 & ~REGEXP_H_LEAST)
- == (CCL|RNGE|CMB)))
- break;
- }
- if (size < 0) {
- regexp_h_push(&sb, &sp, &sc, lp);
- regexp_h_fetch(lp, 1);
- }
- ep += (*ep & 0377) + 19;
- goto star;
- #endif /* REGEXP_H_WCHARS */
- case CBACK:
- bbeg = braslist[*ep & 0377];
- ct = braelist[*ep++ & 0377] - bbeg;
- if(strncmp(bbeg, lp, ct) == 0) {
- lp += ct;
- continue;
- }
- return(0);
- case CBACK|STAR:
- bbeg = braslist[*ep & 0377];
- ct = braelist[*ep++ & 0377] - bbeg;
- curlp = lp;
- while(strncmp(bbeg, lp, ct) == 0)
- lp += ct;
- while(lp >= curlp) {
- if(regexp_h_advance(lp, ep)) return(1);
- lp -= ct;
- }
- return(0);
- case CDOT|STAR:
- curlp = lp;
- do
- regexp_h_push(&sb, &sp, &sc, lp);
- while (*lp++);
- goto star;
- #ifdef REGEXP_H_WCHARS
- case CDOT|STAR|CMB:
- curlp = lp;
- do
- regexp_h_push(&sb, &sp, &sc, lp);
- while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF);
- goto star;
- #endif /* REGEXP_H_WCHARS */
- case CCHR|STAR:
- curlp = lp;
- do
- regexp_h_push(&sb, &sp, &sc, lp);
- while (*lp++ == *ep);
- ep++;
- goto star;
- #ifdef REGEXP_H_WCHARS
- case CCHR|STAR|CMB:
- case CCH1|STAR:
- case CCH2|STAR:
- case CCH3|STAR:
- curlp = lp;
- d = regexp_h_fetch(ep, 0);
- do
- regexp_h_push(&sb, &sp, &sc, lp);
- while (regexp_h_fetch(lp, 1) == d);
- goto star;
- #endif /* REGEXP_H_WCHARS */
- case CCL|STAR:
- curlp = lp;
- do {
- regexp_h_push(&sb, &sp, &sc, lp);
- c = *lp++ & 0377;
- } while(ISTHERE(c));
- ep += 32;
- goto star;
- #ifdef REGEXP_H_WCHARS
- case CCL|STAR|CMB:
- case CNCL|STAR|CMB:
- curlp = lp;
- do {
- regexp_h_push(&sb, &sp, &sc, lp);
- c = regexp_h_fetch(lp, 1);
- } while (regexp_h_cclass(ep, c, (ep[-1] & 0377)
- == (CCL|STAR|CMB)));
- ep += (*ep & 0377) + 17;
- goto star;
- #endif /* REGEXP_H_WCHARS */
- star:
- #ifdef REGEXP_H_WCHARS
- if (regexp_h_wchars == 0) {
- #endif
- do {
- if(--lp == locs)
- break;
- if (regexp_h_advance(lp, ep))
- return(1);
- } while (lp > curlp);
- #ifdef REGEXP_H_WCHARS
- } else {
- do {
- lp = regexp_h_pop(&sb, &sp, &sc, lp);
- if (lp <= locs)
- break;
- if (regexp_h_advance(lp, ep)) {
- regexp_h_zerostak(&sb, &sp);
- return(1);
- }
- } while (lp > curlp);
- regexp_h_zerostak(&sb, &sp);
- }
- #endif /* REGEXP_H_WCHARS */
- return(0);
- }
- }
- static void
- regexp_h_getrnge(register const char *str, int least)
- {
- low = *str++ & 0377;
- size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low;
- }
- int
- advance(const char *lp, const char *ep)
- {
- REGEXP_H_ADVANCE_INIT /* skip past circf */
- regexp_h_bol = lp;
- #ifdef REGEXP_H_WCHARS
- regexp_h_firstwc = NULL;
- #endif /* REGEXP_H_WCHARS */
- return regexp_h_advance(lp, ep);
- }
|