You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1608 lines
36 KiB

27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
  1. #include <sys/types.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <ctype.h>
  5. #include <limits.h>
  6. #include <stdlib.h>
  7. #define POSIX_MISTAKE
  8. #include "utils.h"
  9. #include "regex.h"
  10. #include "regex2.h"
  11. #include "cclass.h"
  12. #include "cname.h"
  13. /*
  14. * parse structure, passed up and down to avoid global variables and
  15. * other clumsinesses
  16. */
  17. struct parse {
  18. char *next; /* next character in RE */
  19. char *end; /* end of string (-> NUL normally) */
  20. int error; /* has an error been seen? */
  21. sop *strip; /* malloced strip */
  22. sopno ssize; /* malloced strip size (allocated) */
  23. sopno slen; /* malloced strip length (used) */
  24. int ncsalloc; /* number of csets allocated */
  25. struct re_guts *g;
  26. # define NPAREN 10 /* we need to remember () 1-9 for back refs */
  27. sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
  28. sopno pend[NPAREN]; /* -> ) ([0] unused) */
  29. };
  30. #include "regcomp.ih"
  31. static char nuls[10]; /* place to point scanner in event of error */
  32. /*
  33. * macros for use with parse structure
  34. * BEWARE: these know that the parse structure is named `p' !!!
  35. */
  36. #define PEEK() (*p->next)
  37. #define PEEK2() (*(p->next+1))
  38. #define MORE() (p->next < p->end)
  39. #define MORE2() (p->next+1 < p->end)
  40. #define SEE(c) (MORE() && PEEK() == (c))
  41. #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
  42. #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
  43. #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
  44. #define NEXT() (p->next++)
  45. #define NEXT2() (p->next += 2)
  46. #define NEXTn(n) (p->next += (n))
  47. #define GETNEXT() (*p->next++)
  48. #define SETERROR(e) seterr(p, (e))
  49. #define REQUIRE(co, e) (void) ((co) || SETERROR(e))
  50. #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
  51. #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
  52. #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
  53. #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
  54. #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
  55. #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
  56. #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
  57. #define HERE() (p->slen)
  58. #define THERE() (p->slen - 1)
  59. #define THERETHERE() (p->slen - 2)
  60. #define DROP(n) (p->slen -= (n))
  61. #ifndef NDEBUG
  62. static int never = 0; /* for use in asserts; shuts lint up */
  63. #else
  64. #define never 0 /* some <assert.h>s have bugs too */
  65. #endif
  66. /*
  67. - regcomp - interface for parser and compilation
  68. = API_EXPORT(int) regcomp(regex_t *, const char *, int);
  69. = #define REG_BASIC 0000
  70. = #define REG_EXTENDED 0001
  71. = #define REG_ICASE 0002
  72. = #define REG_NOSUB 0004
  73. = #define REG_NEWLINE 0010
  74. = #define REG_NOSPEC 0020
  75. = #define REG_PEND 0040
  76. = #define REG_DUMP 0200
  77. */
  78. API_EXPORT(int) /* 0 success, otherwise REG_something */
  79. regcomp(preg, pattern, cflags)
  80. regex_t *preg;
  81. const char *pattern;
  82. int cflags;
  83. {
  84. struct parse pa;
  85. register struct re_guts *g;
  86. register struct parse *p = &pa;
  87. register int i;
  88. register size_t len;
  89. #ifdef REDEBUG
  90. # define GOODFLAGS(f) (f)
  91. #else
  92. # define GOODFLAGS(f) ((f)&~REG_DUMP)
  93. #endif
  94. cflags = GOODFLAGS(cflags);
  95. if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
  96. return(REG_INVARG);
  97. if (cflags&REG_PEND) {
  98. if (preg->re_endp < pattern)
  99. return(REG_INVARG);
  100. len = preg->re_endp - pattern;
  101. } else
  102. len = strlen((char *)pattern);
  103. /* do the mallocs early so failure handling is easy */
  104. g = (struct re_guts *)malloc(sizeof(struct re_guts) +
  105. (NC-1)*sizeof(cat_t));
  106. if (g == NULL)
  107. return(REG_ESPACE);
  108. p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
  109. p->strip = (sop *)malloc(p->ssize * sizeof(sop));
  110. p->slen = 0;
  111. if (p->strip == NULL) {
  112. free((char *)g);
  113. return(REG_ESPACE);
  114. }
  115. /* set things up */
  116. p->g = g;
  117. p->next = (char *)pattern; /* convenience; we do not modify it */
  118. p->end = p->next + len;
  119. p->error = 0;
  120. p->ncsalloc = 0;
  121. for (i = 0; i < NPAREN; i++) {
  122. p->pbegin[i] = 0;
  123. p->pend[i] = 0;
  124. }
  125. g->csetsize = NC;
  126. g->sets = NULL;
  127. g->setbits = NULL;
  128. g->ncsets = 0;
  129. g->cflags = cflags;
  130. g->iflags = 0;
  131. g->nbol = 0;
  132. g->neol = 0;
  133. g->must = NULL;
  134. g->mlen = 0;
  135. g->nsub = 0;
  136. g->ncategories = 1; /* category 0 is "everything else" */
  137. g->categories = &g->catspace[-(CHAR_MIN)];
  138. (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
  139. g->backrefs = 0;
  140. /* do it */
  141. EMIT(OEND, 0);
  142. g->firststate = THERE();
  143. if (cflags&REG_EXTENDED)
  144. p_ere(p, OUT);
  145. else if (cflags&REG_NOSPEC)
  146. p_str(p);
  147. else
  148. p_bre(p, OUT, OUT);
  149. EMIT(OEND, 0);
  150. g->laststate = THERE();
  151. /* tidy up loose ends and fill things in */
  152. categorize(p, g);
  153. stripsnug(p, g);
  154. findmust(p, g);
  155. g->nplus = pluscount(p, g);
  156. g->magic = MAGIC2;
  157. preg->re_nsub = g->nsub;
  158. preg->re_g = g;
  159. preg->re_magic = MAGIC1;
  160. #ifndef REDEBUG
  161. /* not debugging, so can't rely on the assert() in regexec() */
  162. if (g->iflags&BAD)
  163. SETERROR(REG_ASSERT);
  164. #endif
  165. /* win or lose, we're done */
  166. if (p->error != 0) /* lose */
  167. regfree(preg);
  168. return(p->error);
  169. }
  170. /*
  171. - p_ere - ERE parser top level, concatenation and alternation
  172. == static void p_ere(register struct parse *p, int stop);
  173. */
  174. static void
  175. p_ere(p, stop)
  176. register struct parse *p;
  177. int stop; /* character this ERE should end at */
  178. {
  179. register char c;
  180. register sopno prevback = 0;
  181. register sopno prevfwd = 0;
  182. register sopno conc;
  183. register int first = 1; /* is this the first alternative? */
  184. for (;;) {
  185. /* do a bunch of concatenated expressions */
  186. conc = HERE();
  187. while (MORE() && (c = PEEK()) != '|' && c != stop)
  188. p_ere_exp(p);
  189. (void) REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
  190. if (!EAT('|'))
  191. break; /* NOTE BREAK OUT */
  192. if (first) {
  193. INSERT(OCH_, conc); /* offset is wrong */
  194. prevfwd = conc;
  195. prevback = conc;
  196. first = 0;
  197. }
  198. ASTERN(OOR1, prevback);
  199. prevback = THERE();
  200. AHEAD(prevfwd); /* fix previous offset */
  201. prevfwd = HERE();
  202. EMIT(OOR2, 0); /* offset is very wrong */
  203. }
  204. if (!first) { /* tail-end fixups */
  205. AHEAD(prevfwd);
  206. ASTERN(O_CH, prevback);
  207. }
  208. assert(!MORE() || SEE(stop));
  209. }
  210. /*
  211. - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
  212. == static void p_ere_exp(register struct parse *p);
  213. */
  214. static void
  215. p_ere_exp(p)
  216. register struct parse *p;
  217. {
  218. register char c;
  219. register sopno pos;
  220. register int count;
  221. register int count2;
  222. register sopno subno;
  223. int wascaret = 0;
  224. assert(MORE()); /* caller should have ensured this */
  225. c = GETNEXT();
  226. pos = HERE();
  227. switch (c) {
  228. case '(':
  229. REQUIRE(MORE(), REG_EPAREN);
  230. p->g->nsub++;
  231. subno = p->g->nsub;
  232. if (subno < NPAREN)
  233. p->pbegin[subno] = HERE();
  234. EMIT(OLPAREN, subno);
  235. if (!SEE(')'))
  236. p_ere(p, ')');
  237. if (subno < NPAREN) {
  238. p->pend[subno] = HERE();
  239. assert(p->pend[subno] != 0);
  240. }
  241. EMIT(ORPAREN, subno);
  242. MUSTEAT(')', REG_EPAREN);
  243. break;
  244. #ifndef POSIX_MISTAKE
  245. case ')': /* happens only if no current unmatched ( */
  246. /*
  247. * You may ask, why the ifndef? Because I didn't notice
  248. * this until slightly too late for 1003.2, and none of the
  249. * other 1003.2 regular-expression reviewers noticed it at
  250. * all. So an unmatched ) is legal POSIX, at least until
  251. * we can get it fixed.
  252. */
  253. SETERROR(REG_EPAREN);
  254. break;
  255. #endif
  256. case '^':
  257. EMIT(OBOL, 0);
  258. p->g->iflags |= USEBOL;
  259. p->g->nbol++;
  260. wascaret = 1;
  261. break;
  262. case '$':
  263. EMIT(OEOL, 0);
  264. p->g->iflags |= USEEOL;
  265. p->g->neol++;
  266. break;
  267. case '|':
  268. SETERROR(REG_EMPTY);
  269. break;
  270. case '*':
  271. case '+':
  272. case '?':
  273. SETERROR(REG_BADRPT);
  274. break;
  275. case '.':
  276. if (p->g->cflags&REG_NEWLINE)
  277. nonnewline(p);
  278. else
  279. EMIT(OANY, 0);
  280. break;
  281. case '[':
  282. p_bracket(p);
  283. break;
  284. case '\\':
  285. REQUIRE(MORE(), REG_EESCAPE);
  286. c = GETNEXT();
  287. ordinary(p, c);
  288. break;
  289. case '{': /* okay as ordinary except if digit follows */
  290. REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
  291. /* FALLTHROUGH */
  292. default:
  293. ordinary(p, c);
  294. break;
  295. }
  296. if (!MORE())
  297. return;
  298. c = PEEK();
  299. /* we call { a repetition if followed by a digit */
  300. if (!( c == '*' || c == '+' || c == '?' ||
  301. (c == '{' && MORE2() && isdigit(PEEK2())) ))
  302. return; /* no repetition, we're done */
  303. NEXT();
  304. REQUIRE(!wascaret, REG_BADRPT);
  305. switch (c) {
  306. case '*': /* implemented as +? */
  307. /* this case does not require the (y|) trick, noKLUDGE */
  308. INSERT(OPLUS_, pos);
  309. ASTERN(O_PLUS, pos);
  310. INSERT(OQUEST_, pos);
  311. ASTERN(O_QUEST, pos);
  312. break;
  313. case '+':
  314. INSERT(OPLUS_, pos);
  315. ASTERN(O_PLUS, pos);
  316. break;
  317. case '?':
  318. /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
  319. INSERT(OCH_, pos); /* offset slightly wrong */
  320. ASTERN(OOR1, pos); /* this one's right */
  321. AHEAD(pos); /* fix the OCH_ */
  322. EMIT(OOR2, 0); /* offset very wrong... */
  323. AHEAD(THERE()); /* ...so fix it */
  324. ASTERN(O_CH, THERETHERE());
  325. break;
  326. case '{':
  327. count = p_count(p);
  328. if (EAT(',')) {
  329. if (isdigit(PEEK())) {
  330. count2 = p_count(p);
  331. REQUIRE(count <= count2, REG_BADBR);
  332. } else /* single number with comma */
  333. count2 = INFINITY;
  334. } else /* just a single number */
  335. count2 = count;
  336. repeat(p, pos, count, count2);
  337. if (!EAT('}')) { /* error heuristics */
  338. while (MORE() && PEEK() != '}')
  339. NEXT();
  340. REQUIRE(MORE(), REG_EBRACE);
  341. SETERROR(REG_BADBR);
  342. }
  343. break;
  344. }
  345. if (!MORE())
  346. return;
  347. c = PEEK();
  348. if (!( c == '*' || c == '+' || c == '?' ||
  349. (c == '{' && MORE2() && isdigit(PEEK2())) ) )
  350. return;
  351. SETERROR(REG_BADRPT);
  352. }
  353. /*
  354. - p_str - string (no metacharacters) "parser"
  355. == static void p_str(register struct parse *p);
  356. */
  357. static void
  358. p_str(p)
  359. register struct parse *p;
  360. {
  361. REQUIRE(MORE(), REG_EMPTY);
  362. while (MORE())
  363. ordinary(p, GETNEXT());
  364. }
  365. /*
  366. - p_bre - BRE parser top level, anchoring and concatenation
  367. == static void p_bre(register struct parse *p, register int end1, \
  368. == register int end2);
  369. * Giving end1 as OUT essentially eliminates the end1/end2 check.
  370. *
  371. * This implementation is a bit of a kludge, in that a trailing $ is first
  372. * taken as an ordinary character and then revised to be an anchor. The
  373. * only undesirable side effect is that '$' gets included as a character
  374. * category in such cases. This is fairly harmless; not worth fixing.
  375. * The amount of lookahead needed to avoid this kludge is excessive.
  376. */
  377. static void
  378. p_bre(p, end1, end2)
  379. register struct parse *p;
  380. register int end1; /* first terminating character */
  381. register int end2; /* second terminating character */
  382. {
  383. register sopno start = HERE();
  384. register int first = 1; /* first subexpression? */
  385. register int wasdollar = 0;
  386. if (EAT('^')) {
  387. EMIT(OBOL, 0);
  388. p->g->iflags |= USEBOL;
  389. p->g->nbol++;
  390. }
  391. while (MORE() && !SEETWO(end1, end2)) {
  392. wasdollar = p_simp_re(p, first);
  393. first = 0;
  394. }
  395. if (wasdollar) { /* oops, that was a trailing anchor */
  396. DROP(1);
  397. EMIT(OEOL, 0);
  398. p->g->iflags |= USEEOL;
  399. p->g->neol++;
  400. }
  401. REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
  402. }
  403. /*
  404. - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
  405. == static int p_simp_re(register struct parse *p, int starordinary);
  406. */
  407. static int /* was the simple RE an unbackslashed $? */
  408. p_simp_re(p, starordinary)
  409. register struct parse *p;
  410. int starordinary; /* is a leading * an ordinary character? */
  411. {
  412. register int c;
  413. register int count;
  414. register int count2;
  415. register sopno pos;
  416. register int i;
  417. register sopno subno;
  418. # define BACKSL (1<<CHAR_BIT)
  419. pos = HERE(); /* repetion op, if any, covers from here */
  420. assert(MORE()); /* caller should have ensured this */
  421. c = GETNEXT();
  422. if (c == '\\') {
  423. REQUIRE(MORE(), REG_EESCAPE);
  424. c = BACKSL | (unsigned char)GETNEXT();
  425. }
  426. switch (c) {
  427. case '.':
  428. if (p->g->cflags&REG_NEWLINE)
  429. nonnewline(p);
  430. else
  431. EMIT(OANY, 0);
  432. break;
  433. case '[':
  434. p_bracket(p);
  435. break;
  436. case BACKSL|'{':
  437. SETERROR(REG_BADRPT);
  438. break;
  439. case BACKSL|'(':
  440. p->g->nsub++;
  441. subno = p->g->nsub;
  442. if (subno < NPAREN)
  443. p->pbegin[subno] = HERE();
  444. EMIT(OLPAREN, subno);
  445. /* the MORE here is an error heuristic */
  446. if (MORE() && !SEETWO('\\', ')'))
  447. p_bre(p, '\\', ')');
  448. if (subno < NPAREN) {
  449. p->pend[subno] = HERE();
  450. assert(p->pend[subno] != 0);
  451. }
  452. EMIT(ORPAREN, subno);
  453. REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
  454. break;
  455. case BACKSL|')': /* should not get here -- must be user */
  456. case BACKSL|'}':
  457. SETERROR(REG_EPAREN);
  458. break;
  459. case BACKSL|'1':
  460. case BACKSL|'2':
  461. case BACKSL|'3':
  462. case BACKSL|'4':
  463. case BACKSL|'5':
  464. case BACKSL|'6':
  465. case BACKSL|'7':
  466. case BACKSL|'8':
  467. case BACKSL|'9':
  468. i = (c&~BACKSL) - '0';
  469. assert(i < NPAREN);
  470. if (p->pend[i] != 0) {
  471. assert(i <= p->g->nsub);
  472. EMIT(OBACK_, i);
  473. assert(p->pbegin[i] != 0);
  474. assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
  475. assert(OP(p->strip[p->pend[i]]) == ORPAREN);
  476. (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
  477. EMIT(O_BACK, i);
  478. } else
  479. SETERROR(REG_ESUBREG);
  480. p->g->backrefs = 1;
  481. break;
  482. case '*':
  483. REQUIRE(starordinary, REG_BADRPT);
  484. /* FALLTHROUGH */
  485. default:
  486. ordinary(p, (char)c); /* takes off BACKSL, if any */
  487. break;
  488. }
  489. if (EAT('*')) { /* implemented as +? */
  490. /* this case does not require the (y|) trick, noKLUDGE */
  491. INSERT(OPLUS_, pos);
  492. ASTERN(O_PLUS, pos);
  493. INSERT(OQUEST_, pos);
  494. ASTERN(O_QUEST, pos);
  495. } else if (EATTWO('\\', '{')) {
  496. count = p_count(p);
  497. if (EAT(',')) {
  498. if (MORE() && isdigit(PEEK())) {
  499. count2 = p_count(p);
  500. REQUIRE(count <= count2, REG_BADBR);
  501. } else /* single number with comma */
  502. count2 = INFINITY;
  503. } else /* just a single number */
  504. count2 = count;
  505. repeat(p, pos, count, count2);
  506. if (!EATTWO('\\', '}')) { /* error heuristics */
  507. while (MORE() && !SEETWO('\\', '}'))
  508. NEXT();
  509. REQUIRE(MORE(), REG_EBRACE);
  510. SETERROR(REG_BADBR);
  511. }
  512. } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
  513. return(1);
  514. return(0);
  515. }
  516. /*
  517. - p_count - parse a repetition count
  518. == static int p_count(register struct parse *p);
  519. */
  520. static int /* the value */
  521. p_count(p)
  522. register struct parse *p;
  523. {
  524. register int count = 0;
  525. register int ndigits = 0;
  526. while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
  527. count = count*10 + (GETNEXT() - '0');
  528. ndigits++;
  529. }
  530. REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
  531. return(count);
  532. }
  533. /*
  534. - p_bracket - parse a bracketed character list
  535. == static void p_bracket(register struct parse *p);
  536. *
  537. * Note a significant property of this code: if the allocset() did SETERROR,
  538. * no set operations are done.
  539. */
  540. static void
  541. p_bracket(p)
  542. register struct parse *p;
  543. {
  544. register cset *cs = allocset(p);
  545. register int invert = 0;
  546. /* Dept of Truly Sickening Special-Case Kludges */
  547. if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
  548. EMIT(OBOW, 0);
  549. NEXTn(6);
  550. return;
  551. }
  552. if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
  553. EMIT(OEOW, 0);
  554. NEXTn(6);
  555. return;
  556. }
  557. if (EAT('^'))
  558. invert++; /* make note to invert set at end */
  559. if (EAT(']'))
  560. CHadd(cs, ']');
  561. else if (EAT('-'))
  562. CHadd(cs, '-');
  563. while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
  564. p_b_term(p, cs);
  565. if (EAT('-'))
  566. CHadd(cs, '-');
  567. MUSTEAT(']', REG_EBRACK);
  568. if (p->error != 0) /* don't mess things up further */
  569. return;
  570. if (p->g->cflags&REG_ICASE) {
  571. register int i;
  572. register int ci;
  573. for (i = p->g->csetsize - 1; i >= 0; i--)
  574. if (CHIN(cs, i) && isalpha(i)) {
  575. ci = othercase(i);
  576. if (ci != i)
  577. CHadd(cs, ci);
  578. }
  579. if (cs->multis != NULL)
  580. mccase(p, cs);
  581. }
  582. if (invert) {
  583. register int i;
  584. for (i = p->g->csetsize - 1; i >= 0; i--)
  585. if (CHIN(cs, i))
  586. CHsub(cs, i);
  587. else
  588. CHadd(cs, i);
  589. if (p->g->cflags&REG_NEWLINE)
  590. CHsub(cs, '\n');
  591. if (cs->multis != NULL)
  592. mcinvert(p, cs);
  593. }
  594. assert(cs->multis == NULL); /* xxx */
  595. if (nch(p, cs) == 1) { /* optimize singleton sets */
  596. ordinary(p, firstch(p, cs));
  597. freeset(p, cs);
  598. } else
  599. EMIT(OANYOF, freezeset(p, cs));
  600. }
  601. /*
  602. - p_b_term - parse one term of a bracketed character list
  603. == static void p_b_term(register struct parse *p, register cset *cs);
  604. */
  605. static void
  606. p_b_term(p, cs)
  607. register struct parse *p;
  608. register cset *cs;
  609. {
  610. register char c;
  611. register char start, finish;
  612. register int i;
  613. /* classify what we've got */
  614. switch ((MORE()) ? PEEK() : '\0') {
  615. case '[':
  616. c = (MORE2()) ? PEEK2() : '\0';
  617. break;
  618. case '-':
  619. SETERROR(REG_ERANGE);
  620. return; /* NOTE RETURN */
  621. break;
  622. default:
  623. c = '\0';
  624. break;
  625. }
  626. switch (c) {
  627. case ':': /* character class */
  628. NEXT2();
  629. REQUIRE(MORE(), REG_EBRACK);
  630. c = PEEK();
  631. REQUIRE(c != '-' && c != ']', REG_ECTYPE);
  632. p_b_cclass(p, cs);
  633. REQUIRE(MORE(), REG_EBRACK);
  634. REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
  635. break;
  636. case '=': /* equivalence class */
  637. NEXT2();
  638. REQUIRE(MORE(), REG_EBRACK);
  639. c = PEEK();
  640. REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
  641. p_b_eclass(p, cs);
  642. REQUIRE(MORE(), REG_EBRACK);
  643. REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
  644. break;
  645. default: /* symbol, ordinary character, or range */
  646. /* xxx revision needed for multichar stuff */
  647. start = p_b_symbol(p);
  648. if (SEE('-') && MORE2() && PEEK2() != ']') {
  649. /* range */
  650. NEXT();
  651. if (EAT('-'))
  652. finish = '-';
  653. else
  654. finish = p_b_symbol(p);
  655. } else
  656. finish = start;
  657. /* xxx what about signed chars here... */
  658. REQUIRE(start <= finish, REG_ERANGE);
  659. for (i = start; i <= finish; i++)
  660. CHadd(cs, i);
  661. break;
  662. }
  663. }
  664. /*
  665. - p_b_cclass - parse a character-class name and deal with it
  666. == static void p_b_cclass(register struct parse *p, register cset *cs);
  667. */
  668. static void
  669. p_b_cclass(p, cs)
  670. register struct parse *p;
  671. register cset *cs;
  672. {
  673. register char *sp = p->next;
  674. register struct cclass *cp;
  675. register size_t len;
  676. register char *u;
  677. register char c;
  678. while (MORE() && isalpha(PEEK()))
  679. NEXT();
  680. len = p->next - sp;
  681. for (cp = cclasses; cp->name != NULL; cp++)
  682. if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
  683. break;
  684. if (cp->name == NULL) {
  685. /* oops, didn't find it */
  686. SETERROR(REG_ECTYPE);
  687. return;
  688. }
  689. u = cp->chars;
  690. while ((c = *u++) != '\0')
  691. CHadd(cs, c);
  692. for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
  693. MCadd(p, cs, u);
  694. }
  695. /*
  696. - p_b_eclass - parse an equivalence-class name and deal with it
  697. == static void p_b_eclass(register struct parse *p, register cset *cs);
  698. *
  699. * This implementation is incomplete. xxx
  700. */
  701. static void
  702. p_b_eclass(p, cs)
  703. register struct parse *p;
  704. register cset *cs;
  705. {
  706. register char c;
  707. c = p_b_coll_elem(p, '=');
  708. CHadd(cs, c);
  709. }
  710. /*
  711. - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
  712. == static char p_b_symbol(register struct parse *p);
  713. */
  714. static char /* value of symbol */
  715. p_b_symbol(p)
  716. register struct parse *p;
  717. {
  718. register char value;
  719. REQUIRE(MORE(), REG_EBRACK);
  720. if (!EATTWO('[', '.'))
  721. return(GETNEXT());
  722. /* collating symbol */
  723. value = p_b_coll_elem(p, '.');
  724. REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
  725. return(value);
  726. }
  727. /*
  728. - p_b_coll_elem - parse a collating-element name and look it up
  729. == static char p_b_coll_elem(register struct parse *p, int endc);
  730. */
  731. static char /* value of collating element */
  732. p_b_coll_elem(p, endc)
  733. register struct parse *p;
  734. int endc; /* name ended by endc,']' */
  735. {
  736. register char *sp = p->next;
  737. register struct cname *cp;
  738. register int len;
  739. while (MORE() && !SEETWO(endc, ']'))
  740. NEXT();
  741. if (!MORE()) {
  742. SETERROR(REG_EBRACK);
  743. return(0);
  744. }
  745. len = p->next - sp;
  746. for (cp = cnames; cp->name != NULL; cp++)
  747. if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
  748. return(cp->code); /* known name */
  749. if (len == 1)
  750. return(*sp); /* single character */
  751. SETERROR(REG_ECOLLATE); /* neither */
  752. return(0);
  753. }
  754. /*
  755. - othercase - return the case counterpart of an alphabetic
  756. == static char othercase(int ch);
  757. */
  758. static char /* if no counterpart, return ch */
  759. othercase(ch)
  760. int ch;
  761. {
  762. assert(isalpha(ch));
  763. if (isupper(ch))
  764. return(tolower(ch));
  765. else if (islower(ch))
  766. return(toupper(ch));
  767. else /* peculiar, but could happen */
  768. return(ch);
  769. }
  770. /*
  771. - bothcases - emit a dualcase version of a two-case character
  772. == static void bothcases(register struct parse *p, int ch);
  773. *
  774. * Boy, is this implementation ever a kludge...
  775. */
  776. static void
  777. bothcases(p, ch)
  778. register struct parse *p;
  779. int ch;
  780. {
  781. register char *oldnext = p->next;
  782. register char *oldend = p->end;
  783. char bracket[3];
  784. assert(othercase(ch) != ch); /* p_bracket() would recurse */
  785. p->next = bracket;
  786. p->end = bracket+2;
  787. bracket[0] = ch;
  788. bracket[1] = ']';
  789. bracket[2] = '\0';
  790. p_bracket(p);
  791. assert(p->next == bracket+2);
  792. p->next = oldnext;
  793. p->end = oldend;
  794. }
  795. /*
  796. - ordinary - emit an ordinary character
  797. == static void ordinary(register struct parse *p, register int ch);
  798. */
  799. static void
  800. ordinary(p, ch)
  801. register struct parse *p;
  802. register int ch;
  803. {
  804. register cat_t *cap = p->g->categories;
  805. if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
  806. bothcases(p, ch);
  807. else {
  808. EMIT(OCHAR, (unsigned char)ch);
  809. if (cap[ch] == 0)
  810. cap[ch] = p->g->ncategories++;
  811. }
  812. }
  813. /*
  814. - nonnewline - emit REG_NEWLINE version of OANY
  815. == static void nonnewline(register struct parse *p);
  816. *
  817. * Boy, is this implementation ever a kludge...
  818. */
  819. static void
  820. nonnewline(p)
  821. register struct parse *p;
  822. {
  823. register char *oldnext = p->next;
  824. register char *oldend = p->end;
  825. char bracket[4];
  826. p->next = bracket;
  827. p->end = bracket+3;
  828. bracket[0] = '^';
  829. bracket[1] = '\n';
  830. bracket[2] = ']';
  831. bracket[3] = '\0';
  832. p_bracket(p);
  833. assert(p->next == bracket+3);
  834. p->next = oldnext;
  835. p->end = oldend;
  836. }
  837. /*
  838. - repeat - generate code for a bounded repetition, recursively if needed
  839. == static void repeat(register struct parse *p, sopno start, int from, int to);
  840. */
  841. static void
  842. repeat(p, start, from, to)
  843. register struct parse *p;
  844. sopno start; /* operand from here to end of strip */
  845. int from; /* repeated from this number */
  846. int to; /* to this number of times (maybe INFINITY) */
  847. {
  848. register sopno finish = HERE();
  849. # define N 2
  850. # define INF 3
  851. # define REP(f, t) ((f)*8 + (t))
  852. # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
  853. register sopno copy;
  854. if (p->error != 0) /* head off possible runaway recursion */
  855. return;
  856. assert(from <= to);
  857. switch (REP(MAP(from), MAP(to))) {
  858. case REP(0, 0): /* must be user doing this */
  859. DROP(finish-start); /* drop the operand */
  860. break;
  861. case REP(0, 1): /* as x{1,1}? */
  862. case REP(0, N): /* as x{1,n}? */
  863. case REP(0, INF): /* as x{1,}? */
  864. /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
  865. INSERT(OCH_, start); /* offset is wrong... */
  866. repeat(p, start+1, 1, to);
  867. ASTERN(OOR1, start);
  868. AHEAD(start); /* ... fix it */
  869. EMIT(OOR2, 0);
  870. AHEAD(THERE());
  871. ASTERN(O_CH, THERETHERE());
  872. break;
  873. case REP(1, 1): /* trivial case */
  874. /* done */
  875. break;
  876. case REP(1, N): /* as x?x{1,n-1} */
  877. /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
  878. INSERT(OCH_, start);
  879. ASTERN(OOR1, start);
  880. AHEAD(start);
  881. EMIT(OOR2, 0); /* offset very wrong... */
  882. AHEAD(THERE()); /* ...so fix it */
  883. ASTERN(O_CH, THERETHERE());
  884. copy = dupl(p, start+1, finish+1);
  885. assert(copy == finish+4);
  886. repeat(p, copy, 1, to-1);
  887. break;
  888. case REP(1, INF): /* as x+ */
  889. INSERT(OPLUS_, start);
  890. ASTERN(O_PLUS, start);
  891. break;
  892. case REP(N, N): /* as xx{m-1,n-1} */
  893. copy = dupl(p, start, finish);
  894. repeat(p, copy, from-1, to-1);
  895. break;
  896. case REP(N, INF): /* as xx{n-1,INF} */
  897. copy = dupl(p, start, finish);
  898. repeat(p, copy, from-1, to);
  899. break;
  900. default: /* "can't happen" */
  901. SETERROR(REG_ASSERT); /* just in case */
  902. break;
  903. }
  904. }
  905. /*
  906. - seterr - set an error condition
  907. == static int seterr(register struct parse *p, int e);
  908. */
  909. static int /* useless but makes type checking happy */
  910. seterr(p, e)
  911. register struct parse *p;
  912. int e;
  913. {
  914. if (p->error == 0) /* keep earliest error condition */
  915. p->error = e;
  916. p->next = nuls; /* try to bring things to a halt */
  917. p->end = nuls;
  918. return(0); /* make the return value well-defined */
  919. }
  920. /*
  921. - allocset - allocate a set of characters for []
  922. == static cset *allocset(register struct parse *p);
  923. */
  924. static cset *
  925. allocset(p)
  926. register struct parse *p;
  927. {
  928. register int no = p->g->ncsets++;
  929. register size_t nc;
  930. register size_t nbytes;
  931. register cset *cs;
  932. register size_t css = (size_t)p->g->csetsize;
  933. register int i;
  934. if (no >= p->ncsalloc) { /* need another column of space */
  935. p->ncsalloc += CHAR_BIT;
  936. nc = p->ncsalloc;
  937. assert(nc % CHAR_BIT == 0);
  938. nbytes = nc / CHAR_BIT * css;
  939. if (p->g->sets == NULL)
  940. p->g->sets = (cset *)malloc(nc * sizeof(cset));
  941. else
  942. p->g->sets = (cset *)realloc((char *)p->g->sets,
  943. nc * sizeof(cset));
  944. if (p->g->setbits == NULL)
  945. p->g->setbits = (uch *)malloc(nbytes);
  946. else {
  947. p->g->setbits = (uch *)realloc((char *)p->g->setbits,
  948. nbytes);
  949. /* xxx this isn't right if setbits is now NULL */
  950. for (i = 0; i < no; i++)
  951. p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
  952. }
  953. if (p->g->sets != NULL && p->g->setbits != NULL)
  954. (void) memset((char *)p->g->setbits + (nbytes - css),
  955. 0, css);
  956. else {
  957. no = 0;
  958. SETERROR(REG_ESPACE);
  959. /* caller's responsibility not to do set ops */
  960. }
  961. }
  962. assert(p->g->sets != NULL); /* xxx */
  963. cs = &p->g->sets[no];
  964. cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
  965. cs->mask = 1 << ((no) % CHAR_BIT);
  966. cs->hash = 0;
  967. cs->smultis = 0;
  968. cs->multis = NULL;
  969. return(cs);
  970. }
  971. /*
  972. - freeset - free a now-unused set
  973. == static void freeset(register struct parse *p, register cset *cs);
  974. */
  975. static void
  976. freeset(p, cs)
  977. register struct parse *p;
  978. register cset *cs;
  979. {
  980. register size_t i;
  981. register cset *top = &p->g->sets[p->g->ncsets];
  982. register size_t css = (size_t)p->g->csetsize;
  983. for (i = 0; i < css; i++)
  984. CHsub(cs, i);
  985. if (cs == top-1) /* recover only the easy case */
  986. p->g->ncsets--;
  987. }
  988. /*
  989. - freezeset - final processing on a set of characters
  990. == static int freezeset(register struct parse *p, register cset *cs);
  991. *
  992. * The main task here is merging identical sets. This is usually a waste
  993. * of time (although the hash code minimizes the overhead), but can win
  994. * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
  995. * is done using addition rather than xor -- all ASCII [aA] sets xor to
  996. * the same value!
  997. */
  998. static int /* set number */
  999. freezeset(p, cs)
  1000. register struct parse *p;
  1001. register cset *cs;
  1002. {
  1003. register uch h = cs->hash;
  1004. register size_t i;
  1005. register cset *top = &p->g->sets[p->g->ncsets];
  1006. register cset *cs2;
  1007. register size_t css = (size_t)p->g->csetsize;
  1008. /* look for an earlier one which is the same */
  1009. for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
  1010. if (cs2->hash == h && cs2 != cs) {
  1011. /* maybe */
  1012. for (i = 0; i < css; i++)
  1013. if (!!CHIN(cs2, i) != !!CHIN(cs, i))
  1014. break; /* no */
  1015. if (i == css)
  1016. break; /* yes */
  1017. }
  1018. if (cs2 < top) { /* found one */
  1019. freeset(p, cs);
  1020. cs = cs2;
  1021. }
  1022. return((int)(cs - p->g->sets));
  1023. }
  1024. /*
  1025. - firstch - return first character in a set (which must have at least one)
  1026. == static int firstch(register struct parse *p, register cset *cs);
  1027. */
  1028. static int /* character; there is no "none" value */
  1029. firstch(p, cs)
  1030. register struct parse *p;
  1031. register cset *cs;
  1032. {
  1033. register size_t i;
  1034. register size_t css = (size_t)p->g->csetsize;
  1035. for (i = 0; i < css; i++)
  1036. if (CHIN(cs, i))
  1037. return((char)i);
  1038. assert(never);
  1039. return(0); /* arbitrary */
  1040. }
  1041. /*
  1042. - nch - number of characters in a set
  1043. == static int nch(register struct parse *p, register cset *cs);
  1044. */
  1045. static int
  1046. nch(p, cs)
  1047. register struct parse *p;
  1048. register cset *cs;
  1049. {
  1050. register size_t i;
  1051. register size_t css = (size_t)p->g->csetsize;
  1052. register int n = 0;
  1053. for (i = 0; i < css; i++)
  1054. if (CHIN(cs, i))
  1055. n++;
  1056. return(n);
  1057. }
  1058. /*
  1059. - mcadd - add a collating element to a cset
  1060. == static void mcadd(register struct parse *p, register cset *cs, \
  1061. == register char *cp);
  1062. */
  1063. static void
  1064. mcadd(p, cs, cp)
  1065. register struct parse *p;
  1066. register cset *cs;
  1067. register char *cp;
  1068. {
  1069. register size_t oldend = cs->smultis;
  1070. cs->smultis += strlen(cp) + 1;
  1071. if (cs->multis == NULL)
  1072. cs->multis = malloc(cs->smultis);
  1073. else
  1074. cs->multis = realloc(cs->multis, cs->smultis);
  1075. if (cs->multis == NULL) {
  1076. SETERROR(REG_ESPACE);
  1077. return;
  1078. }
  1079. (void) strcpy(cs->multis + oldend - 1, cp);
  1080. cs->multis[cs->smultis - 1] = '\0';
  1081. }
  1082. #if 0
  1083. /*
  1084. - mcsub - subtract a collating element from a cset
  1085. == static void mcsub(register cset *cs, register char *cp);
  1086. */
  1087. static void
  1088. mcsub(cs, cp)
  1089. register cset *cs;
  1090. register char *cp;
  1091. {
  1092. register char *fp = mcfind(cs, cp);
  1093. register size_t len = strlen(fp);
  1094. assert(fp != NULL);
  1095. (void) memmove(fp, fp + len + 1,
  1096. cs->smultis - (fp + len + 1 - cs->multis));
  1097. cs->smultis -= len;
  1098. if (cs->smultis == 0) {
  1099. free(cs->multis);
  1100. cs->multis = NULL;
  1101. return;
  1102. }
  1103. cs->multis = realloc(cs->multis, cs->smultis);
  1104. assert(cs->multis != NULL);
  1105. }
  1106. /*
  1107. - mcin - is a collating element in a cset?
  1108. == static int mcin(register cset *cs, register char *cp);
  1109. */
  1110. static int
  1111. mcin(cs, cp)
  1112. register cset *cs;
  1113. register char *cp;
  1114. {
  1115. return(mcfind(cs, cp) != NULL);
  1116. }
  1117. /*
  1118. - mcfind - find a collating element in a cset
  1119. == static char *mcfind(register cset *cs, register char *cp);
  1120. */
  1121. static char *
  1122. mcfind(cs, cp)
  1123. register cset *cs;
  1124. register char *cp;
  1125. {
  1126. register char *p;
  1127. if (cs->multis == NULL)
  1128. return(NULL);
  1129. for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
  1130. if (strcmp(cp, p) == 0)
  1131. return(p);
  1132. return(NULL);
  1133. }
  1134. #endif
  1135. /*
  1136. - mcinvert - invert the list of collating elements in a cset
  1137. == static void mcinvert(register struct parse *p, register cset *cs);
  1138. *
  1139. * This would have to know the set of possibilities. Implementation
  1140. * is deferred.
  1141. */
  1142. static void
  1143. mcinvert(p, cs)
  1144. register struct parse *p;
  1145. register cset *cs;
  1146. {
  1147. assert(cs->multis == NULL); /* xxx */
  1148. }
  1149. /*
  1150. - mccase - add case counterparts of the list of collating elements in a cset
  1151. == static void mccase(register struct parse *p, register cset *cs);
  1152. *
  1153. * This would have to know the set of possibilities. Implementation
  1154. * is deferred.
  1155. */
  1156. static void
  1157. mccase(p, cs)
  1158. register struct parse *p;
  1159. register cset *cs;
  1160. {
  1161. assert(cs->multis == NULL); /* xxx */
  1162. }
  1163. /*
  1164. - isinsets - is this character in any sets?
  1165. == static int isinsets(register struct re_guts *g, int c);
  1166. */
  1167. static int /* predicate */
  1168. isinsets(g, c)
  1169. register struct re_guts *g;
  1170. int c;
  1171. {
  1172. register uch *col;
  1173. register int i;
  1174. register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
  1175. register unsigned uc = (unsigned char)c;
  1176. for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
  1177. if (col[uc] != 0)
  1178. return(1);
  1179. return(0);
  1180. }
  1181. /*
  1182. - samesets - are these two characters in exactly the same sets?
  1183. == static int samesets(register struct re_guts *g, int c1, int c2);
  1184. */
  1185. static int /* predicate */
  1186. samesets(g, c1, c2)
  1187. register struct re_guts *g;
  1188. int c1;
  1189. int c2;
  1190. {
  1191. register uch *col;
  1192. register int i;
  1193. register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
  1194. register unsigned uc1 = (unsigned char)c1;
  1195. register unsigned uc2 = (unsigned char)c2;
  1196. for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
  1197. if (col[uc1] != col[uc2])
  1198. return(0);
  1199. return(1);
  1200. }
  1201. /*
  1202. - categorize - sort out character categories
  1203. == static void categorize(struct parse *p, register struct re_guts *g);
  1204. */
  1205. static void
  1206. categorize(p, g)
  1207. struct parse *p;
  1208. register struct re_guts *g;
  1209. {
  1210. register cat_t *cats = g->categories;
  1211. register int c;
  1212. register int c2;
  1213. register cat_t cat;
  1214. /* avoid making error situations worse */
  1215. if (p->error != 0)
  1216. return;
  1217. for (c = CHAR_MIN; c <= CHAR_MAX; c++)
  1218. if (cats[c] == 0 && isinsets(g, c)) {
  1219. cat = g->ncategories++;
  1220. cats[c] = cat;
  1221. for (c2 = c+1; c2 <= CHAR_MAX; c2++)
  1222. if (cats[c2] == 0 && samesets(g, c, c2))
  1223. cats[c2] = cat;
  1224. }
  1225. }
  1226. /*
  1227. - dupl - emit a duplicate of a bunch of sops
  1228. == static sopno dupl(register struct parse *p, sopno start, sopno finish);
  1229. */
  1230. static sopno /* start of duplicate */
  1231. dupl(p, start, finish)
  1232. register struct parse *p;
  1233. sopno start; /* from here */
  1234. sopno finish; /* to this less one */
  1235. {
  1236. register sopno ret = HERE();
  1237. register sopno len = finish - start;
  1238. assert(finish >= start);
  1239. if (len == 0)
  1240. return(ret);
  1241. enlarge(p, p->ssize + len); /* this many unexpected additions */
  1242. assert(p->ssize >= p->slen + len);
  1243. (void) memcpy((char *)(p->strip + p->slen),
  1244. (char *)(p->strip + start), (size_t)len*sizeof(sop));
  1245. p->slen += len;
  1246. return(ret);
  1247. }
  1248. /*
  1249. - doemit - emit a strip operator
  1250. == static void doemit(register struct parse *p, sop op, size_t opnd);
  1251. *
  1252. * It might seem better to implement this as a macro with a function as
  1253. * hard-case backup, but it's just too big and messy unless there are
  1254. * some changes to the data structures. Maybe later.
  1255. */
  1256. static void
  1257. doemit(p, op, opnd)
  1258. register struct parse *p;
  1259. sop op;
  1260. size_t opnd;
  1261. {
  1262. /* avoid making error situations worse */
  1263. if (p->error != 0)
  1264. return;
  1265. /* deal with oversize operands ("can't happen", more or less) */
  1266. assert(opnd < 1<<OPSHIFT);
  1267. /* deal with undersized strip */
  1268. if (p->slen >= p->ssize)
  1269. enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
  1270. assert(p->slen < p->ssize);
  1271. /* finally, it's all reduced to the easy case */
  1272. p->strip[p->slen++] = SOP(op, opnd);
  1273. }
  1274. /*
  1275. - doinsert - insert a sop into the strip
  1276. == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
  1277. */
  1278. static void
  1279. doinsert(p, op, opnd, pos)
  1280. register struct parse *p;
  1281. sop op;
  1282. size_t opnd;
  1283. sopno pos;
  1284. {
  1285. register sopno sn;
  1286. register sop s;
  1287. register int i;
  1288. /* avoid making error situations worse */
  1289. if (p->error != 0)
  1290. return;
  1291. sn = HERE();
  1292. EMIT(op, opnd); /* do checks, ensure space */
  1293. assert(HERE() == sn+1);
  1294. s = p->strip[sn];
  1295. /* adjust paren pointers */
  1296. assert(pos > 0);
  1297. for (i = 1; i < NPAREN; i++) {
  1298. if (p->pbegin[i] >= pos) {
  1299. p->pbegin[i]++;
  1300. }
  1301. if (p->pend[i] >= pos) {
  1302. p->pend[i]++;
  1303. }
  1304. }
  1305. memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
  1306. (HERE()-pos-1)*sizeof(sop));
  1307. p->strip[pos] = s;
  1308. }
  1309. /*
  1310. - dofwd - complete a forward reference
  1311. == static void dofwd(register struct parse *p, sopno pos, sop value);
  1312. */
  1313. static void
  1314. dofwd(p, pos, value)
  1315. register struct parse *p;
  1316. register sopno pos;
  1317. sop value;
  1318. {
  1319. /* avoid making error situations worse */
  1320. if (p->error != 0)
  1321. return;
  1322. assert(value < 1<<OPSHIFT);
  1323. p->strip[pos] = OP(p->strip[pos]) | value;
  1324. }
  1325. /*
  1326. - enlarge - enlarge the strip
  1327. == static void enlarge(register struct parse *p, sopno size);
  1328. */
  1329. static void
  1330. enlarge(p, size)
  1331. register struct parse *p;
  1332. register sopno size;
  1333. {
  1334. register sop *sp;
  1335. if (p->ssize >= size)
  1336. return;
  1337. sp = (sop *)realloc(p->strip, size*sizeof(sop));
  1338. if (sp == NULL) {
  1339. SETERROR(REG_ESPACE);
  1340. return;
  1341. }
  1342. p->strip = sp;
  1343. p->ssize = size;
  1344. }
  1345. /*
  1346. - stripsnug - compact the strip
  1347. == static void stripsnug(register struct parse *p, register struct re_guts *g);
  1348. */
  1349. static void
  1350. stripsnug(p, g)
  1351. register struct parse *p;
  1352. register struct re_guts *g;
  1353. {
  1354. g->nstates = p->slen;
  1355. g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
  1356. if (g->strip == NULL) {
  1357. SETERROR(REG_ESPACE);
  1358. g->strip = p->strip;
  1359. }
  1360. }
  1361. /*
  1362. - findmust - fill in must and mlen with longest mandatory literal string
  1363. == static void findmust(register struct parse *p, register struct re_guts *g);
  1364. *
  1365. * This algorithm could do fancy things like analyzing the operands of |
  1366. * for common subsequences. Someday. This code is simple and finds most
  1367. * of the interesting cases.
  1368. *
  1369. * Note that must and mlen got initialized during setup.
  1370. */
  1371. static void
  1372. findmust(p, g)
  1373. struct parse *p;
  1374. register struct re_guts *g;
  1375. {
  1376. register sop *scan;
  1377. sop *start = NULL;
  1378. register sop *newstart = NULL;
  1379. register sopno newlen;
  1380. register sop s;
  1381. register char *cp;
  1382. register sopno i;
  1383. /* avoid making error situations worse */
  1384. if (p->error != 0)
  1385. return;
  1386. /* find the longest OCHAR sequence in strip */
  1387. newlen = 0;
  1388. scan = g->strip + 1;
  1389. do {
  1390. s = *scan++;
  1391. switch (OP(s)) {
  1392. case OCHAR: /* sequence member */
  1393. if (newlen == 0) /* new sequence */
  1394. newstart = scan - 1;
  1395. newlen++;
  1396. break;
  1397. case OPLUS_: /* things that don't break one */
  1398. case OLPAREN:
  1399. case ORPAREN:
  1400. break;
  1401. case OQUEST_: /* things that must be skipped */
  1402. case OCH_:
  1403. scan--;
  1404. do {
  1405. scan += OPND(s);
  1406. s = *scan;
  1407. /* assert() interferes w debug printouts */
  1408. if (OP(s) != O_QUEST && OP(s) != O_CH &&
  1409. OP(s) != OOR2) {
  1410. g->iflags |= BAD;
  1411. return;
  1412. }
  1413. } while (OP(s) != O_QUEST && OP(s) != O_CH);
  1414. /* fallthrough */
  1415. default: /* things that break a sequence */
  1416. if (newlen > g->mlen) { /* ends one */
  1417. start = newstart;
  1418. g->mlen = newlen;
  1419. }
  1420. newlen = 0;
  1421. break;
  1422. }
  1423. } while (OP(s) != OEND);
  1424. if (g->mlen == 0) /* there isn't one */
  1425. return;
  1426. /* turn it into a character string */
  1427. g->must = malloc((size_t)g->mlen + 1);
  1428. if (g->must == NULL) { /* argh; just forget it */
  1429. g->mlen = 0;
  1430. return;
  1431. }
  1432. cp = g->must;
  1433. scan = start;
  1434. for (i = g->mlen; i > 0; i--) {
  1435. while (OP(s = *scan++) != OCHAR)
  1436. continue;
  1437. assert(cp < g->must + g->mlen);
  1438. *cp++ = (char)OPND(s);
  1439. }
  1440. assert(cp == g->must + g->mlen);
  1441. *cp++ = '\0'; /* just on general principles */
  1442. }
  1443. /*
  1444. - pluscount - count + nesting
  1445. == static sopno pluscount(register struct parse *p, register struct re_guts *g);
  1446. */
  1447. static sopno /* nesting depth */
  1448. pluscount(p, g)
  1449. struct parse *p;
  1450. register struct re_guts *g;
  1451. {
  1452. register sop *scan;
  1453. register sop s;
  1454. register sopno plusnest = 0;
  1455. register sopno maxnest = 0;
  1456. if (p->error != 0)
  1457. return(0); /* there may not be an OEND */
  1458. scan = g->strip + 1;
  1459. do {
  1460. s = *scan++;
  1461. switch (OP(s)) {
  1462. case OPLUS_:
  1463. plusnest++;
  1464. break;
  1465. case O_PLUS:
  1466. if (plusnest > maxnest)
  1467. maxnest = plusnest;
  1468. plusnest--;
  1469. break;
  1470. }
  1471. } while (OP(s) != OEND);
  1472. if (plusnest != 0)
  1473. g->iflags |= BAD;
  1474. return(maxnest);
  1475. }