regexp.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. /*
  2. * regexp.h -- old-style regexp compile and step (emulated with POSIX regex)
  3. * Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Library Public License as published by
  7. * the Free Software Foundation; either version 2, or (at your option)
  8. * any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU Library Public License for more details.
  14. */
  15. /*
  16. * Think really hard before you intentionally include this file.
  17. * You should really be using the POSIX regex interface instead.
  18. * This emulation file is intended solely for compiling old code.
  19. *
  20. * A program that uses this file must define six macros: INIT,
  21. * GETC, PEEKC, UNGETC, RETURN, and ERROR. This interface is
  22. * so arcane that VMS hackers point at it in ridicule.
  23. */
  24. #ifndef _REGEXP_H
  25. #define _REGEXP_H
  26. #include <sys/types.h> /* regex.h needs size_t */
  27. #include <regex.h> /* POSIX.2 regexp routines */
  28. #include <stdlib.h> /* for malloc, realloc and free */
  29. /*
  30. * These three advertised external variables record state information
  31. * for compile and step. They are so gross, I'm choking as I write this.
  32. */
  33. char *loc1; /* the beginning of a match */
  34. char *loc2; /* the end of a match */
  35. int circf; /* current pattern begins with '^' */
  36. /*
  37. * These are the other variables mentioned in the regexp.h manpage.
  38. * Since we don't emulate them (whatever they do), we want errors if
  39. * they are referenced. Therefore they are commented out here.
  40. */
  41. #if 0
  42. char *locs;
  43. int sed;
  44. int nbra;
  45. #endif
  46. /*
  47. * We need to stuff a regex_t into an arbitrary buffer so align it.
  48. * GCC make this easy. For the others we have to guess.
  49. */
  50. #ifdef __GNUC__
  51. #define __REGEX_T_ALIGN (__alignof__(regex_t))
  52. #else /* !__GNUC__ */
  53. #define __REGEX_T_ALIGN 8
  54. #endif /* !__GNUC__ */
  55. #define __regex_t_align(p) \
  56. ((regex_t *) ((((unsigned long) p) + __REGEX_T_ALIGN - 1) \
  57. / __REGEX_T_ALIGN * __REGEX_T_ALIGN))
  58. /*
  59. * We just slurp the whole pattern into a string and then compile
  60. * it `normally'. With this implementation we never use the PEEKC
  61. * macro. Please feel free to die laughing when we translate
  62. * error symbols into hard-coded numbers.
  63. */
  64. char *
  65. compile(char *instring, char *expbuf, char *endbuf, int eof)
  66. {
  67. int __c;
  68. int __len;
  69. char *__buf;
  70. int __buflen;
  71. int __error;
  72. regex_t *__preg;
  73. INIT;
  74. __buflen = 128;
  75. __buf = malloc(__buflen);
  76. if (!__buf) {
  77. ERROR(50);
  78. return 0;
  79. }
  80. __len = 0;
  81. circf = 0;
  82. for (;;) {
  83. __c = GETC();
  84. if (__c == eof)
  85. break;
  86. if (__c == '\0' || __c == '\n') {
  87. UNGETC(__c);
  88. break;
  89. }
  90. if (__len + 2 > __buflen) {
  91. __buflen *= 2;
  92. __buf = realloc(__buf, __buflen);
  93. if (!__buf) {
  94. ERROR(50);
  95. return 0;
  96. }
  97. }
  98. if (__len == 0 && !circf && __c == '^')
  99. circf = 1;
  100. else
  101. __buf[__len++] = __c;
  102. }
  103. if (__len == 0 && !circf) {
  104. free(__buf);
  105. ERROR(41);
  106. return 0;
  107. }
  108. __buf[__len] = '\0';
  109. if (endbuf <= expbuf + sizeof(regex_t)) {
  110. free(__buf);
  111. ERROR(50);
  112. return 0;
  113. }
  114. __preg = __regex_t_align(expbuf);
  115. __preg->buffer = (char *) (__preg + 1);
  116. __preg->allocated = endbuf - (char *) __preg->buffer;
  117. __error = regcomp(__preg, __buf, REG_NEWLINE);
  118. free(__buf);
  119. switch (__error) {
  120. case 0:
  121. break;
  122. case REG_BADRPT:
  123. __error = 36; /* poor fit */
  124. break;
  125. case REG_BADBR:
  126. __error = 16;
  127. break;
  128. case REG_EBRACE:
  129. __error = 44; /* poor fit */
  130. break;
  131. case REG_EBRACK:
  132. __error = 49;
  133. break;
  134. case REG_ERANGE:
  135. __error = 36; /* poor fit */
  136. break;
  137. case REG_ECTYPE:
  138. __error = 36; /* poor fit */
  139. break;
  140. case REG_EPAREN:
  141. __error = 42;
  142. break;
  143. case REG_ESUBREG:
  144. __error = 36; /* poor fit */
  145. break;
  146. case REG_EEND:
  147. __error = 36; /* poor fit */
  148. break;
  149. case REG_EESCAPE:
  150. __error = 36;
  151. break;
  152. case REG_BADPAT:
  153. __error = 36; /* poor fit */
  154. break;
  155. case REG_ESIZE:
  156. __error = 50;
  157. break;
  158. case REG_ESPACE:
  159. __error = 50;
  160. break;
  161. default:
  162. __error = 36; /* as good as any */
  163. break;
  164. }
  165. if (__error) {
  166. ERROR(__error);
  167. return 0;
  168. }
  169. #ifdef _RX_H
  170. RETURN((__preg->buffer + __preg->rx.allocated - __preg->rx.reserved));
  171. #else
  172. RETURN((__preg->buffer + __preg->used));
  173. #endif
  174. }
  175. /*
  176. * Note how we carefully emulate the gross `circf' hack. Otherwise,
  177. * this just looks like an ordinary matching call that records the
  178. * starting and ending match positions.
  179. */
  180. int
  181. step(char *string, char *expbuf)
  182. {
  183. int __result;
  184. regmatch_t __pmatch[1];
  185. __result = regexec(__regex_t_align(expbuf), string, 1, __pmatch, 0);
  186. if (circf && __pmatch[0].rm_so != 0)
  187. __result = REG_NOMATCH;
  188. if (__result == 0) {
  189. loc1 = string + __pmatch[0].rm_so;
  190. loc2 = string + __pmatch[0].rm_eo;
  191. }
  192. return __result == 0;
  193. }
  194. /*
  195. * For advance we are only supposed to match at the beginning of the
  196. * string. You have to read the man page really carefully to find this
  197. * one. We'll match them kludge-for-kludge.
  198. */
  199. int
  200. advance(char *string, char *expbuf)
  201. {
  202. int __old_circf;
  203. int __result;
  204. __old_circf = circf;
  205. circf = 1;
  206. __result = step(string, expbuf);
  207. circf = __old_circf;
  208. return __result;
  209. }
  210. #endif /* _REGEXP_H */