regexp.h 25 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211
  1. /*
  2. * Simple Regular Expression functions. Derived from Unix 7th Edition,
  3. * /usr/src/cmd/expr.y
  4. *
  5. * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002.
  6. *
  7. * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. * Redistributions of source code and documentation must retain the
  13. * above copyright notice, this list of conditions and the following
  14. * disclaimer.
  15. * Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. * All advertising materials mentioning features or use of this software
  19. * must display the following acknowledgement:
  20. * This product includes software developed or owned by Caldera
  21. * International, Inc.
  22. * Neither the name of Caldera International, Inc. nor the names of
  23. * other contributors may be used to endorse or promote products
  24. * derived from this software without specific prior written permission.
  25. *
  26. * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
  27. * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
  28. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  29. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30. * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
  31. * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  34. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  35. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  36. * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  37. * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
  40. #define REGEXP_H_USED __attribute__ ((used))
  41. #elif defined __GNUC__
  42. #define REGEXP_H_USED __attribute__ ((unused))
  43. #else
  44. #define REGEXP_H_USED
  45. #endif
  46. static const char regexp_h_sccsid[] REGEXP_H_USED =
  47. "@(#)regexp.sl 1.56 (gritter) 5/29/05";
  48. #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__)
  49. #define REGEXP_H_WCHARS
  50. #endif
  51. #define CBRA 2
  52. #define CCHR 4
  53. #define CDOT 8
  54. #define CCL 12
  55. /* CLNUM 14 used in sed */
  56. /* CEND 16 used in sed */
  57. #define CDOL 20
  58. #define CCEOF 22
  59. #define CKET 24
  60. #define CBACK 36
  61. #define CNCL 40
  62. #define CBRC 44
  63. #define CLET 48
  64. #define CCH1 52
  65. #define CCH2 56
  66. #define CCH3 60
  67. #define STAR 01
  68. #define RNGE 03
  69. #define REGEXP_H_LEAST 0100
  70. #ifdef REGEXP_H_WCHARS
  71. #define CMB 0200
  72. #else /* !REGEXP_H_WCHARS */
  73. #define CMB 0
  74. #endif /* !REGEXP_H_WCHARS */
  75. #define NBRA 9
  76. #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
  77. #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
  78. #ifdef REGEXP_H_WCHARS
  79. #define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07])
  80. #endif
  81. #include <ctype.h>
  82. #include <string.h>
  83. #include <limits.h>
  84. #ifdef REGEXP_H_WCHARS
  85. #include <stdlib.h>
  86. #include <wchar.h>
  87. #include <wctype.h>
  88. #endif /* REGEXP_H_WCHARS */
  89. #define regexp_h_uletter(c) (isalpha(c) || (c) == '_')
  90. #ifdef REGEXP_H_WCHARS
  91. #define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_')
  92. /*
  93. * Used to allocate memory for the multibyte star algorithm.
  94. */
  95. #ifndef regexp_h_malloc
  96. #define regexp_h_malloc(n) malloc(n)
  97. #endif
  98. #ifndef regexp_h_free
  99. #define regexp_h_free(p) free(p)
  100. #endif
  101. /*
  102. * Can be predefined to 'inline' to inline some multibyte functions;
  103. * may improve performance for files that contain many multibyte
  104. * sequences.
  105. */
  106. #ifndef regexp_h_inline
  107. #define regexp_h_inline
  108. #endif
  109. /*
  110. * Mask to determine whether the first byte of a sequence possibly
  111. * starts a multibyte character. Set to 0377 to force mbtowc() for
  112. * any byte sequence (except 0).
  113. */
  114. #ifndef REGEXP_H_MASK
  115. #define REGEXP_H_MASK 0200
  116. #endif
  117. #endif /* REGEXP_H_WCHARS */
  118. /*
  119. * For regexpr.h.
  120. */
  121. #ifndef regexp_h_static
  122. #define regexp_h_static
  123. #endif
  124. #ifndef REGEXP_H_STEP_INIT
  125. #define REGEXP_H_STEP_INIT
  126. #endif
  127. #ifndef REGEXP_H_ADVANCE_INIT
  128. #define REGEXP_H_ADVANCE_INIT
  129. #endif
  130. char *braslist[NBRA];
  131. char *braelist[NBRA];
  132. int nbra;
  133. char *loc1, *loc2, *locs;
  134. int sed;
  135. int nodelim;
  136. regexp_h_static int circf;
  137. regexp_h_static int low;
  138. regexp_h_static int size;
  139. regexp_h_static unsigned char bittab[] = {
  140. 1,
  141. 2,
  142. 4,
  143. 8,
  144. 16,
  145. 32,
  146. 64,
  147. 128
  148. };
  149. static int regexp_h_advance(register const char *lp,
  150. register const char *ep);
  151. static void regexp_h_getrnge(register const char *str, int least);
  152. static const char *regexp_h_bol; /* beginning of input line (for \<) */
  153. #ifdef REGEXP_H_WCHARS
  154. static int regexp_h_wchars;
  155. static int regexp_h_mbcurmax;
  156. static const char *regexp_h_firstwc; /* location of first
  157. multibyte character
  158. on input line */
  159. #define regexp_h_getwc(c) { \
  160. if (regexp_h_wchars) { \
  161. char mbbuf[MB_LEN_MAX + 1], *mbptr; \
  162. wchar_t wcbuf; \
  163. int mb, len; \
  164. mbptr = mbbuf; \
  165. do { \
  166. mb = GETC(); \
  167. *mbptr++ = mb; \
  168. *mbptr = '\0'; \
  169. } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \
  170. && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \
  171. if (len == -1) \
  172. ERROR(67); \
  173. c = wcbuf; \
  174. } else { \
  175. c = GETC(); \
  176. } \
  177. }
  178. #define regexp_h_store(wc, mb, me) { \
  179. int len; \
  180. if (wc == WEOF) \
  181. ERROR(67); \
  182. if ((len = me - mb) <= regexp_h_mbcurmax) { \
  183. char mt[MB_LEN_MAX]; \
  184. if (wctomb(mt, wc) >= len) \
  185. ERROR(50); \
  186. } \
  187. switch (len = wctomb(mb, wc)) { \
  188. case -1: \
  189. ERROR(67); \
  190. case 0: \
  191. mb++; \
  192. break; \
  193. default: \
  194. mb += len; \
  195. } \
  196. }
  197. static regexp_h_inline wint_t
  198. regexp_h_fetchwc(const char **mb, int islp)
  199. {
  200. wchar_t wc;
  201. int len;
  202. if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) {
  203. (*mb)++;
  204. return WEOF;
  205. }
  206. if (islp && regexp_h_firstwc == NULL)
  207. regexp_h_firstwc = *mb;
  208. /*if (len == 0) {
  209. (*mb)++;
  210. return L'\0';
  211. } handled in singlebyte code */
  212. *mb += len;
  213. return wc;
  214. }
  215. #define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \
  216. (*(mb)++&0377): \
  217. regexp_h_fetchwc(&(mb), islp))
  218. static regexp_h_inline wint_t
  219. regexp_h_showwc(const char *mb)
  220. {
  221. wchar_t wc;
  222. if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0)
  223. return WEOF;
  224. return wc;
  225. }
  226. #define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \
  227. regexp_h_showwc(mb))
  228. /*
  229. * Return the character immediately preceding mb. Since no byte is
  230. * required to be the first byte of a character, the longest multibyte
  231. * character ending at &[mb-1] is searched.
  232. */
  233. static regexp_h_inline wint_t
  234. regexp_h_previous(const char *mb)
  235. {
  236. const char *p = mb;
  237. wchar_t wc, lastwc = WEOF;
  238. int len, max = 0;
  239. if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc)
  240. return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF);
  241. while (p-- > regexp_h_bol) {
  242. mbtowc(NULL, NULL, 0);
  243. if ((len = mbtowc(&wc, p, mb - p)) >= 0) {
  244. if (len < max || len < mb - p)
  245. break;
  246. max = len;
  247. lastwc = wc;
  248. } else if (len < 0 && max > 0)
  249. break;
  250. }
  251. return lastwc;
  252. }
  253. #define regexp_h_cclass(set, c, af) \
  254. ((c) == 0 || (c) == WEOF ? 0 : ( \
  255. ((c) > 0177) ? \
  256. regexp_h_cclass_wc(set, c, af) : ( \
  257. REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \
  258. ) \
  259. ) \
  260. )
  261. static regexp_h_inline int
  262. regexp_h_cclass_wc(const char *set, register wint_t c, int af)
  263. {
  264. register wint_t wc, wl = WEOF;
  265. const char *end;
  266. end = &set[18] + set[0] - 1;
  267. set += 17;
  268. while (set < end) {
  269. wc = regexp_h_fetch(set, 0);
  270. #ifdef REGEXP_H_VI_BACKSLASH
  271. if (wc == '\\' && set < end &&
  272. (*set == ']' || *set == '-' ||
  273. *set == '^' || *set == '\\')) {
  274. wc = regexp_h_fetch(set, 0);
  275. } else
  276. #endif /* REGEXP_H_VI_BACKSLASH */
  277. if (wc == '-' && wl != WEOF && set < end) {
  278. wc = regexp_h_fetch(set, 0);
  279. #ifdef REGEXP_H_VI_BACKSLASH
  280. if (wc == '\\' && set < end &&
  281. (*set == ']' || *set == '-' ||
  282. *set == '^' || *set == '\\')) {
  283. wc = regexp_h_fetch(set, 0);
  284. }
  285. #endif /* REGEXP_H_VI_BACKSLASH */
  286. if (c > wl && c < wc)
  287. return af;
  288. }
  289. if (c == wc)
  290. return af;
  291. wl = wc;
  292. }
  293. return !af;
  294. }
  295. #else /* !REGEXP_H_WCHARS */
  296. #define regexp_h_wchars 0
  297. #define regexp_h_getwc(c) { c = GETC(); }
  298. #endif /* !REGEXP_H_WCHARS */
  299. regexp_h_static char *
  300. compile(char *instring, char *ep, const char *endbuf, int seof)
  301. {
  302. INIT /* Dependent declarations and initializations */
  303. register int c;
  304. register int eof = seof;
  305. char *lastep = instring;
  306. int cclcnt;
  307. char bracket[NBRA], *bracketp;
  308. int closed;
  309. char neg;
  310. int lc;
  311. int i, cflg;
  312. #ifdef REGEXP_H_WCHARS
  313. char *eq;
  314. regexp_h_mbcurmax = MB_CUR_MAX;
  315. regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0;
  316. #endif
  317. lastep = 0;
  318. bracketp = bracket;
  319. if((c = GETC()) == eof || c == '\n') {
  320. if (c == '\n') {
  321. UNGETC(c);
  322. nodelim = 1;
  323. }
  324. if(*ep == 0 && !sed)
  325. ERROR(41);
  326. if (bracketp > bracket)
  327. ERROR(42);
  328. RETURN(ep);
  329. }
  330. circf = closed = nbra = 0;
  331. if (c == '^')
  332. circf++;
  333. else
  334. UNGETC(c);
  335. for (;;) {
  336. if (ep >= endbuf)
  337. ERROR(50);
  338. regexp_h_getwc(c);
  339. if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
  340. lastep = ep;
  341. if (c == eof) {
  342. *ep++ = CCEOF;
  343. if (bracketp > bracket)
  344. ERROR(42);
  345. RETURN(ep);
  346. }
  347. switch (c) {
  348. case '.':
  349. *ep++ = CDOT|regexp_h_wchars;
  350. continue;
  351. case '\n':
  352. if (sed == 0) {
  353. UNGETC(c);
  354. *ep++ = CCEOF;
  355. nodelim = 1;
  356. RETURN(ep);
  357. }
  358. ERROR(36);
  359. case '*':
  360. if (lastep==0 || *lastep==CBRA || *lastep==CKET ||
  361. *lastep==(CBRC|regexp_h_wchars) ||
  362. *lastep==(CLET|regexp_h_wchars))
  363. goto defchar;
  364. *lastep |= STAR;
  365. continue;
  366. case '$':
  367. if(PEEKC() != eof)
  368. goto defchar;
  369. *ep++ = CDOL;
  370. continue;
  371. case '[':
  372. #ifdef REGEXP_H_WCHARS
  373. if (regexp_h_wchars == 0) {
  374. #endif
  375. if(&ep[33] >= endbuf)
  376. ERROR(50);
  377. *ep++ = CCL;
  378. lc = 0;
  379. for(i = 0; i < 32; i++)
  380. ep[i] = 0;
  381. neg = 0;
  382. if((c = GETC()) == '^') {
  383. neg = 1;
  384. c = GETC();
  385. }
  386. do {
  387. c &= 0377;
  388. if(c == '\0' || c == '\n')
  389. ERROR(49);
  390. #ifdef REGEXP_H_VI_BACKSLASH
  391. if(c == '\\' && ((c = PEEKC()) == ']' ||
  392. c == '-' || c == '^' ||
  393. c == '\\')) {
  394. c = GETC();
  395. c &= 0377;
  396. } else
  397. #endif /* REGEXP_H_VI_BACKSLASH */
  398. if(c == '-' && lc != 0) {
  399. if ((c = GETC()) == ']') {
  400. PLACE('-');
  401. break;
  402. }
  403. #ifdef REGEXP_H_VI_BACKSLASH
  404. if(c == '\\' &&
  405. ((c = PEEKC()) == ']' ||
  406. c == '-' ||
  407. c == '^' ||
  408. c == '\\'))
  409. c = GETC();
  410. #endif /* REGEXP_H_VI_BACKSLASH */
  411. c &= 0377;
  412. while(lc < c) {
  413. PLACE(lc);
  414. lc++;
  415. }
  416. }
  417. lc = c;
  418. PLACE(c);
  419. } while((c = GETC()) != ']');
  420. if(neg) {
  421. for(cclcnt = 0; cclcnt < 32; cclcnt++)
  422. ep[cclcnt] ^= 0377;
  423. ep[0] &= 0376;
  424. }
  425. ep += 32;
  426. #ifdef REGEXP_H_WCHARS
  427. } else {
  428. if (&ep[18] >= endbuf)
  429. ERROR(50);
  430. *ep++ = CCL|CMB;
  431. *ep++ = 0;
  432. lc = 0;
  433. for (i = 0; i < 16; i++)
  434. ep[i] = 0;
  435. eq = &ep[16];
  436. regexp_h_getwc(c);
  437. if (c == L'^') {
  438. regexp_h_getwc(c);
  439. ep[-2] = CNCL|CMB;
  440. }
  441. do {
  442. if (c == '\0' || c == '\n')
  443. ERROR(49);
  444. #ifdef REGEXP_H_VI_BACKSLASH
  445. if(c == '\\' && ((c = PEEKC()) == ']' ||
  446. c == '-' || c == '^' ||
  447. c == '\\')) {
  448. regexp_h_store(c, eq, endbuf);
  449. regexp_h_getwc(c);
  450. } else
  451. #endif /* REGEXP_H_VI_BACKSLASH */
  452. if (c == '-' && lc != 0 && lc <= 0177) {
  453. regexp_h_store(c, eq, endbuf);
  454. regexp_h_getwc(c);
  455. if (c == ']') {
  456. PLACE('-');
  457. break;
  458. }
  459. #ifdef REGEXP_H_VI_BACKSLASH
  460. if(c == '\\' &&
  461. ((c = PEEKC()) == ']' ||
  462. c == '-' ||
  463. c == '^' ||
  464. c == '\\')) {
  465. regexp_h_store(c, eq,
  466. endbuf);
  467. regexp_h_getwc(c);
  468. }
  469. #endif /* REGEXP_H_VI_BACKSLASH */
  470. while (lc < (c & 0177)) {
  471. PLACE(lc);
  472. lc++;
  473. }
  474. }
  475. lc = c;
  476. if (c <= 0177)
  477. PLACE(c);
  478. regexp_h_store(c, eq, endbuf);
  479. regexp_h_getwc(c);
  480. } while (c != L']');
  481. if ((i = eq - &ep[16]) > 255)
  482. ERROR(50);
  483. lastep[1] = i;
  484. ep = eq;
  485. }
  486. #endif /* REGEXP_H_WCHARS */
  487. continue;
  488. case '\\':
  489. regexp_h_getwc(c);
  490. switch(c) {
  491. case '(':
  492. if(nbra >= NBRA)
  493. ERROR(43);
  494. *bracketp++ = nbra;
  495. *ep++ = CBRA;
  496. *ep++ = nbra++;
  497. continue;
  498. case ')':
  499. if(bracketp <= bracket)
  500. ERROR(42);
  501. *ep++ = CKET;
  502. *ep++ = *--bracketp;
  503. closed++;
  504. continue;
  505. case '<':
  506. *ep++ = CBRC|regexp_h_wchars;
  507. continue;
  508. case '>':
  509. *ep++ = CLET|regexp_h_wchars;
  510. continue;
  511. case '{':
  512. if(lastep == (char *) (0))
  513. goto defchar;
  514. *lastep |= RNGE;
  515. cflg = 0;
  516. nlim:
  517. c = GETC();
  518. i = 0;
  519. do {
  520. if ('0' <= c && c <= '9')
  521. i = 10 * i + c - '0';
  522. else
  523. ERROR(16);
  524. } while(((c = GETC()) != '\\') && (c != ','));
  525. if (i > 255)
  526. ERROR(11);
  527. *ep++ = i;
  528. if (c == ',') {
  529. if(cflg++)
  530. ERROR(44);
  531. if((c = GETC()) == '\\') {
  532. *ep++ = (char)255;
  533. *lastep |= REGEXP_H_LEAST;
  534. } else {
  535. UNGETC(c);
  536. goto nlim; /* get 2'nd number */
  537. }
  538. }
  539. if(GETC() != '}')
  540. ERROR(45);
  541. if(!cflg) /* one number */
  542. *ep++ = i;
  543. else if((ep[-1] & 0377) < (ep[-2] & 0377))
  544. ERROR(46);
  545. continue;
  546. case '\n':
  547. ERROR(36);
  548. case 'n':
  549. c = '\n';
  550. goto defchar;
  551. default:
  552. if(c >= '1' && c <= '9') {
  553. if((c -= '1') >= closed)
  554. ERROR(25);
  555. *ep++ = CBACK;
  556. *ep++ = c;
  557. continue;
  558. }
  559. }
  560. /* Drop through to default to use \ to turn off special chars */
  561. defchar:
  562. default:
  563. lastep = ep;
  564. #ifdef REGEXP_H_WCHARS
  565. if (regexp_h_wchars == 0) {
  566. #endif
  567. *ep++ = CCHR;
  568. *ep++ = c;
  569. #ifdef REGEXP_H_WCHARS
  570. } else {
  571. char mbbuf[MB_LEN_MAX];
  572. switch (wctomb(mbbuf, c)) {
  573. case 1: *ep++ = CCH1;
  574. break;
  575. case 2: *ep++ = CCH2;
  576. break;
  577. case 3: *ep++ = CCH3;
  578. break;
  579. default:
  580. *ep++ = CCHR|CMB;
  581. }
  582. regexp_h_store(c, ep, endbuf);
  583. }
  584. #endif /* REGEXP_H_WCHARS */
  585. }
  586. }
  587. }
  588. int
  589. step(const char *p1, const char *p2)
  590. {
  591. register int c;
  592. #ifdef REGEXP_H_WCHARS
  593. register int d;
  594. #endif /* REGEXP_H_WCHARS */
  595. REGEXP_H_STEP_INIT /* get circf */
  596. regexp_h_bol = p1;
  597. #ifdef REGEXP_H_WCHARS
  598. regexp_h_firstwc = NULL;
  599. #endif /* REGEXP_H_WCHARS */
  600. if (circf) {
  601. loc1 = (char *)p1;
  602. return(regexp_h_advance(p1, p2));
  603. }
  604. /* fast check for first character */
  605. if (*p2==CCHR) {
  606. c = p2[1] & 0377;
  607. do {
  608. if ((*p1 & 0377) != c)
  609. continue;
  610. if (regexp_h_advance(p1, p2)) {
  611. loc1 = (char *)p1;
  612. return(1);
  613. }
  614. } while (*p1++);
  615. return(0);
  616. }
  617. #ifdef REGEXP_H_WCHARS
  618. else if (*p2==CCH1) {
  619. do {
  620. if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) {
  621. loc1 = (char *)p1;
  622. return(1);
  623. }
  624. c = regexp_h_fetch(p1, 1);
  625. } while (c);
  626. return(0);
  627. } else if (*p2==CCH2) {
  628. do {
  629. if (p1[0] == p2[1] && p1[1] == p2[2] &&
  630. regexp_h_advance(p1, p2)) {
  631. loc1 = (char *)p1;
  632. return(1);
  633. }
  634. c = regexp_h_fetch(p1, 1);
  635. } while (c);
  636. return(0);
  637. } else if (*p2==CCH3) {
  638. do {
  639. if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&&
  640. regexp_h_advance(p1, p2)) {
  641. loc1 = (char *)p1;
  642. return(1);
  643. }
  644. c = regexp_h_fetch(p1, 1);
  645. } while (c);
  646. return(0);
  647. } else if ((*p2&0377)==(CCHR|CMB)) {
  648. d = regexp_h_fetch(p2, 0);
  649. do {
  650. c = regexp_h_fetch(p1, 1);
  651. if (c == d && regexp_h_advance(p1, p2)) {
  652. loc1 = (char *)p1;
  653. return(1);
  654. }
  655. } while(c);
  656. return(0);
  657. }
  658. /* regular algorithm */
  659. if (regexp_h_wchars)
  660. do {
  661. if (regexp_h_advance(p1, p2)) {
  662. loc1 = (char *)p1;
  663. return(1);
  664. }
  665. c = regexp_h_fetch(p1, 1);
  666. } while (c);
  667. else
  668. #endif /* REGEXP_H_WCHARS */
  669. do {
  670. if (regexp_h_advance(p1, p2)) {
  671. loc1 = (char *)p1;
  672. return(1);
  673. }
  674. } while (*p1++);
  675. return(0);
  676. }
  677. #ifdef REGEXP_H_WCHARS
  678. /*
  679. * It is painfully slow to read character-wise backwards in a
  680. * multibyte string (see regexp_h_previous() above). For the star
  681. * algorithm, we therefore keep track of every character as it is
  682. * read in forward direction.
  683. *
  684. * Don't use alloca() for stack blocks since there is no measurable
  685. * speedup and huge amounts of memory are used up for long input
  686. * lines.
  687. */
  688. #ifndef REGEXP_H_STAKBLOK
  689. #define REGEXP_H_STAKBLOK 1000
  690. #endif
  691. struct regexp_h_stack {
  692. struct regexp_h_stack *s_nxt;
  693. struct regexp_h_stack *s_prv;
  694. const char *s_ptr[REGEXP_H_STAKBLOK];
  695. };
  696. #define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \
  697. regexp_h_pushwc(sb, sp, sc, lp) : (void)0)
  698. static regexp_h_inline void
  699. regexp_h_pushwc(struct regexp_h_stack **sb,
  700. struct regexp_h_stack **sp,
  701. const char ***sc, const char *lp)
  702. {
  703. if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc)
  704. return;
  705. if (*sb == NULL) {
  706. if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL)
  707. return;
  708. (*sb)->s_nxt = (*sb)->s_prv = NULL;
  709. *sp = *sb;
  710. *sc = &(*sb)->s_ptr[0];
  711. } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) {
  712. if ((*sp)->s_nxt == NULL) {
  713. struct regexp_h_stack *bq;
  714. if ((bq = regexp_h_malloc(sizeof *bq)) == NULL)
  715. return;
  716. bq->s_nxt = NULL;
  717. bq->s_prv = *sp;
  718. (*sp)->s_nxt = bq;
  719. *sp = bq;
  720. } else
  721. *sp = (*sp)->s_nxt;
  722. *sc = &(*sp)->s_ptr[0];
  723. }
  724. *(*sc)++ = lp;
  725. }
  726. static regexp_h_inline const char *
  727. regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp,
  728. const char ***sc, const char *lp)
  729. {
  730. if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc)
  731. return &lp[-1];
  732. if (*sp == NULL)
  733. return regexp_h_firstwc;
  734. if (*sc == &(*sp)->s_ptr[0]) {
  735. if ((*sp)->s_prv == NULL) {
  736. regexp_h_free(*sp);
  737. *sp = NULL;
  738. *sb = NULL;
  739. return regexp_h_firstwc;
  740. }
  741. *sp = (*sp)->s_prv;
  742. regexp_h_free((*sp)->s_nxt);
  743. (*sp)->s_nxt = NULL ;
  744. *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK];
  745. }
  746. return *(--(*sc));
  747. }
  748. static void
  749. regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp)
  750. {
  751. for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt)
  752. if ((*sp)->s_prv)
  753. regexp_h_free((*sp)->s_prv);
  754. if (*sp) {
  755. if ((*sp)->s_prv)
  756. regexp_h_free((*sp)->s_prv);
  757. regexp_h_free(*sp);
  758. }
  759. *sp = *sb = NULL;
  760. }
  761. #else /* !REGEXP_H_WCHARS */
  762. #define regexp_h_push(sb, sp, sc, lp)
  763. #endif /* !REGEXP_H_WCHARS */
  764. static int
  765. regexp_h_advance(const char *lp, const char *ep)
  766. {
  767. register const char *curlp;
  768. int c, least;
  769. #ifdef REGEXP_H_WCHARS
  770. int d;
  771. struct regexp_h_stack *sb = NULL, *sp = NULL;
  772. const char **sc;
  773. #endif /* REGEXP_H_WCHARS */
  774. char *bbeg;
  775. int ct;
  776. for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) {
  777. case CCHR:
  778. #ifdef REGEXP_H_WCHARS
  779. case CCH1:
  780. #endif
  781. if (*ep++ == *lp++)
  782. continue;
  783. return(0);
  784. #ifdef REGEXP_H_WCHARS
  785. case CCHR|CMB:
  786. if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1))
  787. continue;
  788. return(0);
  789. case CCH2:
  790. if (ep[0] == lp[0] && ep[1] == lp[1]) {
  791. ep += 2, lp += 2;
  792. continue;
  793. }
  794. return(0);
  795. case CCH3:
  796. if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) {
  797. ep += 3, lp += 3;
  798. continue;
  799. }
  800. return(0);
  801. #endif /* REGEXP_H_WCHARS */
  802. case CDOT:
  803. if (*lp++)
  804. continue;
  805. return(0);
  806. #ifdef REGEXP_H_WCHARS
  807. case CDOT|CMB:
  808. if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF)
  809. continue;
  810. return(0);
  811. #endif /* REGEXP_H_WCHARS */
  812. case CDOL:
  813. if (*lp==0)
  814. continue;
  815. return(0);
  816. case CCEOF:
  817. loc2 = (char *)lp;
  818. return(1);
  819. case CCL:
  820. c = *lp++ & 0377;
  821. if(ISTHERE(c)) {
  822. ep += 32;
  823. continue;
  824. }
  825. return(0);
  826. #ifdef REGEXP_H_WCHARS
  827. case CCL|CMB:
  828. case CNCL|CMB:
  829. c = regexp_h_fetch(lp, 1);
  830. if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) {
  831. ep += (*ep & 0377) + 17;
  832. continue;
  833. }
  834. return 0;
  835. #endif /* REGEXP_H_WCHARS */
  836. case CBRA:
  837. braslist[*ep++ & 0377] = (char *)lp;
  838. continue;
  839. case CKET:
  840. braelist[*ep++ & 0377] = (char *)lp;
  841. continue;
  842. case CBRC:
  843. if (lp == regexp_h_bol && locs == NULL)
  844. continue;
  845. if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377))
  846. && !regexp_h_uletter(lp[-1] & 0377)
  847. && !isdigit(lp[-1] & 0377))
  848. continue;
  849. return(0);
  850. #ifdef REGEXP_H_WCHARS
  851. case CBRC|CMB:
  852. c = regexp_h_show(lp);
  853. d = regexp_h_previous(lp);
  854. if ((iswdigit(c) || regexp_h_wuletter(c))
  855. && !regexp_h_wuletter(d)
  856. && !iswdigit(d))
  857. continue;
  858. return(0);
  859. #endif /* REGEXP_H_WCHARS */
  860. case CLET:
  861. if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377))
  862. continue;
  863. return(0);
  864. #ifdef REGEXP_H_WCHARS
  865. case CLET|CMB:
  866. c = regexp_h_show(lp);
  867. if (!regexp_h_wuletter(c) && !iswdigit(c))
  868. continue;
  869. return(0);
  870. #endif /* REGEXP_H_WCHARS */
  871. case CCHR|RNGE:
  872. c = *ep++;
  873. regexp_h_getrnge(ep, least);
  874. while(low--)
  875. if(*lp++ != c)
  876. return(0);
  877. curlp = lp;
  878. while(size--) {
  879. regexp_h_push(&sb, &sp, &sc, lp);
  880. if(*lp++ != c)
  881. break;
  882. }
  883. if(size < 0) {
  884. regexp_h_push(&sb, &sp, &sc, lp);
  885. lp++;
  886. }
  887. ep += 2;
  888. goto star;
  889. #ifdef REGEXP_H_WCHARS
  890. case CCHR|RNGE|CMB:
  891. case CCH1|RNGE:
  892. case CCH2|RNGE:
  893. case CCH3|RNGE:
  894. c = regexp_h_fetch(ep, 0);
  895. regexp_h_getrnge(ep, least);
  896. while (low--)
  897. if (regexp_h_fetch(lp, 1) != c)
  898. return 0;
  899. curlp = lp;
  900. while (size--) {
  901. regexp_h_push(&sb, &sp, &sc, lp);
  902. if (regexp_h_fetch(lp, 1) != c)
  903. break;
  904. }
  905. if(size < 0) {
  906. regexp_h_push(&sb, &sp, &sc, lp);
  907. regexp_h_fetch(lp, 1);
  908. }
  909. ep += 2;
  910. goto star;
  911. #endif /* REGEXP_H_WCHARS */
  912. case CDOT|RNGE:
  913. regexp_h_getrnge(ep, least);
  914. while(low--)
  915. if(*lp++ == '\0')
  916. return(0);
  917. curlp = lp;
  918. while(size--) {
  919. regexp_h_push(&sb, &sp, &sc, lp);
  920. if(*lp++ == '\0')
  921. break;
  922. }
  923. if(size < 0) {
  924. regexp_h_push(&sb, &sp, &sc, lp);
  925. lp++;
  926. }
  927. ep += 2;
  928. goto star;
  929. #ifdef REGEXP_H_WCHARS
  930. case CDOT|RNGE|CMB:
  931. regexp_h_getrnge(ep, least);
  932. while (low--)
  933. if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
  934. return 0;
  935. curlp = lp;
  936. while (size--) {
  937. regexp_h_push(&sb, &sp, &sc, lp);
  938. if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
  939. break;
  940. }
  941. if (size < 0) {
  942. regexp_h_push(&sb, &sp, &sc, lp);
  943. regexp_h_fetch(lp, 1);
  944. }
  945. ep += 2;
  946. goto star;
  947. #endif /* REGEXP_H_WCHARS */
  948. case CCL|RNGE:
  949. regexp_h_getrnge(ep + 32, least);
  950. while(low--) {
  951. c = *lp++ & 0377;
  952. if(!ISTHERE(c))
  953. return(0);
  954. }
  955. curlp = lp;
  956. while(size--) {
  957. regexp_h_push(&sb, &sp, &sc, lp);
  958. c = *lp++ & 0377;
  959. if(!ISTHERE(c))
  960. break;
  961. }
  962. if(size < 0) {
  963. regexp_h_push(&sb, &sp, &sc, lp);
  964. lp++;
  965. }
  966. ep += 34; /* 32 + 2 */
  967. goto star;
  968. #ifdef REGEXP_H_WCHARS
  969. case CCL|RNGE|CMB:
  970. case CNCL|RNGE|CMB:
  971. regexp_h_getrnge(ep + (*ep & 0377) + 17, least);
  972. while (low--) {
  973. c = regexp_h_fetch(lp, 1);
  974. if (!regexp_h_cclass(ep, c,
  975. (ep[-1] & 0377 & ~REGEXP_H_LEAST)
  976. == (CCL|RNGE|CMB)))
  977. return 0;
  978. }
  979. curlp = lp;
  980. while (size--) {
  981. regexp_h_push(&sb, &sp, &sc, lp);
  982. c = regexp_h_fetch(lp, 1);
  983. if (!regexp_h_cclass(ep, c,
  984. (ep[-1] & 0377 & ~REGEXP_H_LEAST)
  985. == (CCL|RNGE|CMB)))
  986. break;
  987. }
  988. if (size < 0) {
  989. regexp_h_push(&sb, &sp, &sc, lp);
  990. regexp_h_fetch(lp, 1);
  991. }
  992. ep += (*ep & 0377) + 19;
  993. goto star;
  994. #endif /* REGEXP_H_WCHARS */
  995. case CBACK:
  996. bbeg = braslist[*ep & 0377];
  997. ct = braelist[*ep++ & 0377] - bbeg;
  998. if(strncmp(bbeg, lp, ct) == 0) {
  999. lp += ct;
  1000. continue;
  1001. }
  1002. return(0);
  1003. case CBACK|STAR:
  1004. bbeg = braslist[*ep & 0377];
  1005. ct = braelist[*ep++ & 0377] - bbeg;
  1006. curlp = lp;
  1007. while(strncmp(bbeg, lp, ct) == 0)
  1008. lp += ct;
  1009. while(lp >= curlp) {
  1010. if(regexp_h_advance(lp, ep)) return(1);
  1011. lp -= ct;
  1012. }
  1013. return(0);
  1014. case CDOT|STAR:
  1015. curlp = lp;
  1016. do
  1017. regexp_h_push(&sb, &sp, &sc, lp);
  1018. while (*lp++);
  1019. goto star;
  1020. #ifdef REGEXP_H_WCHARS
  1021. case CDOT|STAR|CMB:
  1022. curlp = lp;
  1023. do
  1024. regexp_h_push(&sb, &sp, &sc, lp);
  1025. while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF);
  1026. goto star;
  1027. #endif /* REGEXP_H_WCHARS */
  1028. case CCHR|STAR:
  1029. curlp = lp;
  1030. do
  1031. regexp_h_push(&sb, &sp, &sc, lp);
  1032. while (*lp++ == *ep);
  1033. ep++;
  1034. goto star;
  1035. #ifdef REGEXP_H_WCHARS
  1036. case CCHR|STAR|CMB:
  1037. case CCH1|STAR:
  1038. case CCH2|STAR:
  1039. case CCH3|STAR:
  1040. curlp = lp;
  1041. d = regexp_h_fetch(ep, 0);
  1042. do
  1043. regexp_h_push(&sb, &sp, &sc, lp);
  1044. while (regexp_h_fetch(lp, 1) == d);
  1045. goto star;
  1046. #endif /* REGEXP_H_WCHARS */
  1047. case CCL|STAR:
  1048. curlp = lp;
  1049. do {
  1050. regexp_h_push(&sb, &sp, &sc, lp);
  1051. c = *lp++ & 0377;
  1052. } while(ISTHERE(c));
  1053. ep += 32;
  1054. goto star;
  1055. #ifdef REGEXP_H_WCHARS
  1056. case CCL|STAR|CMB:
  1057. case CNCL|STAR|CMB:
  1058. curlp = lp;
  1059. do {
  1060. regexp_h_push(&sb, &sp, &sc, lp);
  1061. c = regexp_h_fetch(lp, 1);
  1062. } while (regexp_h_cclass(ep, c, (ep[-1] & 0377)
  1063. == (CCL|STAR|CMB)));
  1064. ep += (*ep & 0377) + 17;
  1065. goto star;
  1066. #endif /* REGEXP_H_WCHARS */
  1067. star:
  1068. #ifdef REGEXP_H_WCHARS
  1069. if (regexp_h_wchars == 0) {
  1070. #endif
  1071. do {
  1072. if(--lp == locs)
  1073. break;
  1074. if (regexp_h_advance(lp, ep))
  1075. return(1);
  1076. } while (lp > curlp);
  1077. #ifdef REGEXP_H_WCHARS
  1078. } else {
  1079. do {
  1080. lp = regexp_h_pop(&sb, &sp, &sc, lp);
  1081. if (lp <= locs)
  1082. break;
  1083. if (regexp_h_advance(lp, ep)) {
  1084. regexp_h_zerostak(&sb, &sp);
  1085. return(1);
  1086. }
  1087. } while (lp > curlp);
  1088. regexp_h_zerostak(&sb, &sp);
  1089. }
  1090. #endif /* REGEXP_H_WCHARS */
  1091. return(0);
  1092. }
  1093. }
  1094. static void
  1095. regexp_h_getrnge(register const char *str, int least)
  1096. {
  1097. low = *str++ & 0377;
  1098. size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low;
  1099. }
  1100. int
  1101. advance(const char *lp, const char *ep)
  1102. {
  1103. REGEXP_H_ADVANCE_INIT /* skip past circf */
  1104. regexp_h_bol = lp;
  1105. #ifdef REGEXP_H_WCHARS
  1106. regexp_h_firstwc = NULL;
  1107. #endif /* REGEXP_H_WCHARS */
  1108. return regexp_h_advance(lp, ep);
  1109. }