You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

140 lines
5.4 KiB

28 years ago
28 years ago
28 years ago
28 years ago
28 years ago
28 years ago
28 years ago
28 years ago
28 years ago
  1. /*
  2. * First, the stuff that ends up in the outside-world include file
  3. = #ifdef WIN32
  4. = #define API_EXPORT(type) __declspec(dllexport) type __stdcall
  5. = #else
  6. = #define API_EXPORT(type) type
  7. = #endif
  8. =
  9. = typedef off_t regoff_t;
  10. = typedef struct {
  11. = int re_magic;
  12. = size_t re_nsub; // number of parenthesized subexpressions
  13. = const unsigned char *re_endp; // end pointer for REG_PEND
  14. = struct re_guts *re_g; // none of your business :-)
  15. = } regex_t;
  16. = typedef struct {
  17. = regoff_t rm_so; // start of match
  18. = regoff_t rm_eo; // end of match
  19. = } regmatch_t;
  20. */
  21. /*
  22. * internals of regex_t
  23. */
  24. #define MAGIC1 ((('r'^0200)<<8) | 'e')
  25. /*
  26. * The internal representation is a *strip*, a sequence of
  27. * operators ending with an endmarker. (Some terminology etc. is a
  28. * historical relic of earlier versions which used multiple strips.)
  29. * Certain oddities in the representation are there to permit running
  30. * the machinery backwards; in particular, any deviation from sequential
  31. * flow must be marked at both its source and its destination. Some
  32. * fine points:
  33. *
  34. * - OPLUS_ and O_PLUS are *inside* the loop they create.
  35. * - OQUEST_ and O_QUEST are *outside* the bypass they create.
  36. * - OCH_ and O_CH are *outside* the multi-way branch they create, while
  37. * OOR1 and OOR2 are respectively the end and the beginning of one of
  38. * the branches. Note that there is an implicit OOR2 following OCH_
  39. * and an implicit OOR1 preceding O_CH.
  40. *
  41. * In state representations, an operator's bit is on to signify a state
  42. * immediately *preceding* "execution" of that operator.
  43. */
  44. typedef long sop; /* strip operator */
  45. typedef long sopno;
  46. #define OPRMASK 0x7c000000
  47. #define OPDMASK 0x03ffffff
  48. #define OPSHIFT (26)
  49. #define OP(n) ((n)&OPRMASK)
  50. #define OPND(n) ((n)&OPDMASK)
  51. #define SOP(op, opnd) ((op)|(opnd))
  52. /* operators meaning operand */
  53. /* (back, fwd are offsets) */
  54. #define OEND (1<<OPSHIFT) /* endmarker - */
  55. #define OCHAR (2<<OPSHIFT) /* character unsigned char */
  56. #define OBOL (3<<OPSHIFT) /* left anchor - */
  57. #define OEOL (4<<OPSHIFT) /* right anchor - */
  58. #define OANY (5<<OPSHIFT) /* . - */
  59. #define OANYOF (6<<OPSHIFT) /* [...] set number */
  60. #define OBACK_ (7<<OPSHIFT) /* begin \d paren number */
  61. #define O_BACK (8<<OPSHIFT) /* end \d paren number */
  62. #define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */
  63. #define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */
  64. #define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */
  65. #define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */
  66. #define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
  67. #define ORPAREN (14<<OPSHIFT) /* ) back to ( */
  68. #define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
  69. #define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
  70. #define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
  71. #define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
  72. #define OBOW (19<<OPSHIFT) /* begin word - */
  73. #define OEOW (20<<OPSHIFT) /* end word - */
  74. /*
  75. * Structure for [] character-set representation. Character sets are
  76. * done as bit vectors, grouped 8 to a byte vector for compactness.
  77. * The individual set therefore has both a pointer to the byte vector
  78. * and a mask to pick out the relevant bit of each byte. A hash code
  79. * simplifies testing whether two sets could be identical.
  80. *
  81. * This will get trickier for multicharacter collating elements. As
  82. * preliminary hooks for dealing with such things, we also carry along
  83. * a string of multi-character elements, and decide the size of the
  84. * vectors at run time.
  85. */
  86. typedef struct {
  87. uch *ptr; /* -> uch [csetsize] */
  88. uch mask; /* bit within array */
  89. uch hash; /* hash code */
  90. size_t smultis;
  91. unsigned char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
  92. } cset;
  93. /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
  94. #define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
  95. #define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
  96. #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
  97. #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
  98. #define MCsub(p, cs, cp) mcsub(p, cs, cp)
  99. #define MCin(p, cs, cp) mcin(p, cs, cp)
  100. /* stuff for character categories */
  101. typedef unsigned char cat_t;
  102. /*
  103. * main compiled-expression structure
  104. */
  105. struct re_guts {
  106. int magic;
  107. # define MAGIC2 ((('R'^0200)<<8)|'E')
  108. sop *strip; /* malloced area for strip */
  109. int csetsize; /* number of bits in a cset vector */
  110. int ncsets; /* number of csets in use */
  111. cset *sets; /* -> cset [ncsets] */
  112. uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
  113. int cflags; /* copy of regcomp() cflags argument */
  114. sopno nstates; /* = number of sops */
  115. sopno firststate; /* the initial OEND (normally 0) */
  116. sopno laststate; /* the final OEND */
  117. int iflags; /* internal flags */
  118. # define USEBOL 01 /* used ^ */
  119. # define USEEOL 02 /* used $ */
  120. # define BAD 04 /* something wrong */
  121. int nbol; /* number of ^ used */
  122. int neol; /* number of $ used */
  123. int ncategories; /* how many character categories */
  124. cat_t *categories; /* ->catspace[-UCHAR_MIN] */
  125. unsigned char *must; /* match must contain this string */
  126. int mlen; /* length of must */
  127. size_t nsub; /* copy of re_nsub */
  128. int backrefs; /* does it use back references? */
  129. sopno nplus; /* how deep does it nest +s? */
  130. /* catspace must be last */
  131. cat_t catspace[1]; /* actually [NC] */
  132. };
  133. /* misc utilities */
  134. #define OUT (UCHAR_MAX+1) /* a non-character value */
  135. #define ISWORD(c) (isalnum(c) || (c) == '_')