You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1729 lines
70 KiB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
  1. /*************************************************
  2. * Perl-Compatible Regular Expressions *
  3. *************************************************/
  4. /* PCRE is a library of functions to support regular expressions whose syntax
  5. and semantics are as close as possible to those of the Perl 5 language.
  6. Main Library written by Philip Hazel
  7. Copyright (c) 1997-2012 University of Cambridge
  8. This JIT compiler regression test program was written by Zoltan Herczeg
  9. Copyright (c) 2010-2012
  10. -----------------------------------------------------------------------------
  11. Redistribution and use in source and binary forms, with or without
  12. modification, are permitted provided that the following conditions are met:
  13. * Redistributions of source code must retain the above copyright notice,
  14. this list of conditions and the following disclaimer.
  15. * Redistributions in binary form must reproduce the above copyright
  16. notice, this list of conditions and the following disclaimer in the
  17. documentation and/or other materials provided with the distribution.
  18. * Neither the name of the University of Cambridge nor the names of its
  19. contributors may be used to endorse or promote products derived from
  20. this software without specific prior written permission.
  21. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  25. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31. POSSIBILITY OF SUCH DAMAGE.
  32. -----------------------------------------------------------------------------
  33. */
  34. #ifdef HAVE_CONFIG_H
  35. #include "config.h"
  36. #endif
  37. #include <stdio.h>
  38. #include <string.h>
  39. #include "pcre.h"
  40. #include "pcre_internal.h"
  41. #define PCRE_BUG 0x80000000
  42. /*
  43. Letter characters:
  44. \xe6\x92\xad = 0x64ad = 25773 (kanji)
  45. Non-letter characters:
  46. \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
  47. \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
  48. \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
  49. \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
  50. Newlines:
  51. \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
  52. \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
  53. Othercase pairs:
  54. \xc3\xa9 = 0xe9 = 233 (e')
  55. \xc3\x89 = 0xc9 = 201 (E')
  56. \xc3\xa1 = 0xe1 = 225 (a')
  57. \xc3\x81 = 0xc1 = 193 (A')
  58. \xc8\xba = 0x23a = 570
  59. \xe2\xb1\xa5 = 0x2c65 = 11365
  60. \xe1\xbd\xb8 = 0x1f78 = 8056
  61. \xe1\xbf\xb8 = 0x1ff8 = 8184
  62. \xf0\x90\x90\x80 = 0x10400 = 66560
  63. \xf0\x90\x90\xa8 = 0x10428 = 66600
  64. \xc7\x84 = 0x1c4 = 452
  65. \xc7\x85 = 0x1c5 = 453
  66. \xc7\x86 = 0x1c6 = 454
  67. Mark property:
  68. \xcc\x8d = 0x30d = 781
  69. Special:
  70. \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
  71. \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
  72. \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
  73. \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
  74. \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
  75. \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
  76. */
  77. static int regression_tests(void);
  78. int main(void)
  79. {
  80. int jit = 0;
  81. #if defined SUPPORT_PCRE8
  82. pcre_config(PCRE_CONFIG_JIT, &jit);
  83. #elif defined SUPPORT_PCRE16
  84. pcre16_config(PCRE_CONFIG_JIT, &jit);
  85. #elif defined SUPPORT_PCRE32
  86. pcre32_config(PCRE_CONFIG_JIT, &jit);
  87. #endif
  88. if (!jit) {
  89. printf("JIT must be enabled to run pcre_jit_test\n");
  90. return 1;
  91. }
  92. return regression_tests();
  93. }
  94. /* --------------------------------------------------------------------------------------- */
  95. #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
  96. #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
  97. #endif
  98. #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
  99. #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
  100. #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
  101. #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
  102. #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
  103. #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
  104. #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
  105. #define OFFSET_MASK 0x00ffff
  106. #define F_NO8 0x010000
  107. #define F_NO16 0x020000
  108. #define F_NO32 0x020000
  109. #define F_NOMATCH 0x040000
  110. #define F_DIFF 0x080000
  111. #define F_FORCECONV 0x100000
  112. #define F_PROPERTY 0x200000
  113. #define F_STUDY 0x400000
  114. struct regression_test_case {
  115. int flags;
  116. int start_offset;
  117. const char *pattern;
  118. const char *input;
  119. };
  120. static struct regression_test_case regression_test_cases[] = {
  121. /* Constant strings. */
  122. { MUA, 0, "AbC", "AbAbC" },
  123. { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
  124. { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
  125. { MA, 0, "[^a]", "aAbB" },
  126. { CMA, 0, "[^m]", "mMnN" },
  127. { MA, 0, "a[^b][^#]", "abacd" },
  128. { CMA, 0, "A[^B][^E]", "abacd" },
  129. { CMUA, 0, "[^x][^#]", "XxBll" },
  130. { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
  131. { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
  132. { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
  133. { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
  134. { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
  135. { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
  136. { MUA, 0, "[axd]", "sAXd" },
  137. { CMUA, 0, "[axd]", "sAXd" },
  138. { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
  139. { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
  140. { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
  141. { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
  142. { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
  143. { MUA, 0, "[^a]", "\xc2\x80[]" },
  144. { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
  145. { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
  146. { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
  147. { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
  148. { PCRE_CASELESS, 0, "a1", "Aa1" },
  149. { MA, 0, "\\Ca", "cda" },
  150. { CMA, 0, "\\Ca", "CDA" },
  151. { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
  152. { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
  153. { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
  154. { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
  155. { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
  156. { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
  157. /* Assertions. */
  158. { MUA, 0, "\\b[^A]", "A_B#" },
  159. { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
  160. { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
  161. { MAP, 0, "\\B", "_\xa1" },
  162. { MAP, 0, "\\b_\\b[,A]\\B", "_," },
  163. { MUAP, 0, "\\b", "\xe6\x92\xad!" },
  164. { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
  165. { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
  166. { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
  167. { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
  168. { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
  169. { MA, 0 | F_NOMATCH, "\\R^", "\n" },
  170. { MA, 1 | F_NOMATCH, "^", "\n" },
  171. { 0, 0, "^ab", "ab" },
  172. { 0, 0 | F_NOMATCH, "^ab", "aab" },
  173. { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
  174. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
  175. { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
  176. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
  177. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
  178. { 0, 0, "ab$", "ab" },
  179. { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
  180. { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
  181. { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
  182. { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
  183. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
  184. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
  185. { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
  186. { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
  187. { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
  188. { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
  189. { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
  190. { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
  191. { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
  192. { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
  193. { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
  194. { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
  195. { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
  196. { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
  197. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
  198. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
  199. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
  200. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
  201. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
  202. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
  203. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
  204. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
  205. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
  206. { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
  207. { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
  208. { MA, 0, "\\Aa", "aaa" },
  209. { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
  210. { MA, 1, "\\Ga", "aaa" },
  211. { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
  212. { MA, 0, "a\\z", "aaa" },
  213. { MA, 0 | F_NOMATCH, "a\\z", "aab" },
  214. /* Brackets. */
  215. { MUA, 0, "(ab|bb|cd)", "bacde" },
  216. { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
  217. { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
  218. { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
  219. { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
  220. { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
  221. /* Greedy and non-greedy ? operators. */
  222. { MUA, 0, "(?:a)?a", "laab" },
  223. { CMUA, 0, "(A)?A", "llaab" },
  224. { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
  225. { MUA, 0, "(a)?a", "manm" },
  226. { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
  227. { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
  228. { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
  229. /* Greedy and non-greedy + operators */
  230. { MUA, 0, "(aa)+aa", "aaaaaaa" },
  231. { MUA, 0, "(aa)+?aa", "aaaaaaa" },
  232. { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
  233. { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
  234. { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
  235. { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
  236. { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
  237. /* Greedy and non-greedy * operators */
  238. { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
  239. { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
  240. { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
  241. { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
  242. { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
  243. { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
  244. { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
  245. { MA, 0, "((?:a|)*){0}a", "a" },
  246. /* Combining ? + * operators */
  247. { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
  248. { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
  249. { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
  250. { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
  251. { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
  252. /* Single character iterators. */
  253. { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
  254. { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
  255. { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
  256. { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
  257. { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
  258. { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
  259. { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
  260. { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
  261. { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
  262. { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
  263. { MUA, 0, "(a?+[^b])+", "babaacacb" },
  264. { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
  265. { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
  266. { CMUA, 0, "[c-f]+k", "DemmFke" },
  267. { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
  268. { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
  269. { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
  270. { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
  271. { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
  272. { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
  273. { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
  274. { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
  275. { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
  276. { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
  277. { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
  278. { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
  279. { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
  280. { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
  281. { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
  282. { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
  283. { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
  284. { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
  285. /* Bracket repeats with limit. */
  286. { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
  287. { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
  288. { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
  289. { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
  290. { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
  291. { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
  292. { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
  293. { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
  294. { MUA, 0, "(ab){4,6}?M", "abababababababM" },
  295. /* Basic character sets. */
  296. { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
  297. { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
  298. { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
  299. { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
  300. { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
  301. { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
  302. { MUA, 0, "x[bcef]+", "xaxdxecbfg" },
  303. { MUA, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
  304. { MUA, 0, "x[^befg]+", "xbxexacdhg" },
  305. { MUA, 0, "x[^bcdl]+", "xlxbxaekmd" },
  306. { MUA, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
  307. { MUA, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
  308. { CMUA, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
  309. { CMUA, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
  310. { MUA, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
  311. { MUA, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
  312. { MUA, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
  313. { MUA, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
  314. { MUA, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
  315. { MUA, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
  316. { MUA, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
  317. { MUA, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
  318. /* Unicode properties. */
  319. { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
  320. { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
  321. { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
  322. { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
  323. { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
  324. { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
  325. { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
  326. { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
  327. { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
  328. { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
  329. { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
  330. { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
  331. { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
  332. { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
  333. { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
  334. { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
  335. { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
  336. { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
  337. { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
  338. { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
  339. /* Possible empty brackets. */
  340. { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
  341. { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
  342. { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
  343. { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
  344. { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
  345. { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
  346. { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
  347. { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
  348. { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
  349. { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
  350. /* Start offset. */
  351. { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
  352. { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
  353. { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
  354. { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
  355. /* Newline. */
  356. { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
  357. { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
  358. { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
  359. { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
  360. { MUA, 1, "^", "\r\n" },
  361. { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
  362. { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
  363. /* Any character except newline or any newline. */
  364. { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
  365. { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
  366. { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
  367. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
  368. { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
  369. { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
  370. { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
  371. { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
  372. { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
  373. { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
  374. { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
  375. { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
  376. { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
  377. { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
  378. { MUA, 0, "\\R+", "ab\r\n\r" },
  379. { MUA, 0, "\\R*", "ab\r\n\r" },
  380. { MUA, 0, "\\R*", "\r\n\r" },
  381. { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
  382. { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
  383. { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
  384. { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
  385. { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
  386. { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
  387. { MUA, 0, "\\R*\\R\\R", "\n\r" },
  388. { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
  389. { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
  390. /* Atomic groups (no fallback from "next" direction). */
  391. { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
  392. { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
  393. { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
  394. "bababcdedefgheijijklmlmnop" },
  395. { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
  396. { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
  397. { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
  398. { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
  399. { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
  400. { MUA, 0, "(?>x|)*$", "aaa" },
  401. { MUA, 0, "(?>(x)|)*$", "aaa" },
  402. { MUA, 0, "(?>x|())*$", "aaa" },
  403. { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
  404. { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
  405. { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
  406. { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
  407. { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
  408. { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
  409. { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
  410. { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
  411. { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
  412. { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
  413. { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
  414. { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
  415. { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
  416. { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
  417. { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
  418. { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
  419. { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
  420. { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
  421. { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
  422. { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
  423. { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
  424. { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
  425. { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
  426. { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
  427. { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
  428. { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
  429. /* Possessive quantifiers. */
  430. { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
  431. { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
  432. { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
  433. { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
  434. { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
  435. { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
  436. { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
  437. { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
  438. { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
  439. { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
  440. { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
  441. { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
  442. { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
  443. { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
  444. { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
  445. { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
  446. { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
  447. { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
  448. { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
  449. { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
  450. { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
  451. { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
  452. { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
  453. { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
  454. { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
  455. { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
  456. { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
  457. { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
  458. { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
  459. { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
  460. { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
  461. { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
  462. { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
  463. { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
  464. { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
  465. /* Back references. */
  466. { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
  467. { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
  468. { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
  469. { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
  470. { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
  471. { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
  472. { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
  473. { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
  474. { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
  475. { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
  476. { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
  477. { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
  478. { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
  479. { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
  480. { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
  481. { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
  482. { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
  483. { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
  484. { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
  485. { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
  486. { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
  487. { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
  488. { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
  489. { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
  490. { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
  491. { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
  492. { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
  493. { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
  494. { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
  495. { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
  496. { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
  497. { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
  498. { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
  499. { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
  500. { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
  501. { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
  502. { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
  503. { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
  504. { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
  505. /* Assertions. */
  506. { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
  507. { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
  508. { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
  509. { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
  510. { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
  511. { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
  512. { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
  513. { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
  514. { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
  515. { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
  516. { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
  517. { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
  518. { MUA, 0, "((?(?=a)a)+k)", "bbak" },
  519. { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
  520. { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
  521. { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
  522. { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
  523. { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
  524. { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
  525. { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
  526. { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
  527. { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
  528. { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
  529. { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
  530. /* Not empty, ACCEPT, FAIL */
  531. { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
  532. { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
  533. { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
  534. { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
  535. { MUA, 0, "a(*ACCEPT)b", "ab" },
  536. { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
  537. { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
  538. { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
  539. { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
  540. { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
  541. { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
  542. { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
  543. { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
  544. { MUA, 0, "((a(*ACCEPT)b))", "ab" },
  545. { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
  546. { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
  547. { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
  548. { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
  549. { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
  550. /* Conditional blocks. */
  551. { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
  552. { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
  553. { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
  554. { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
  555. { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
  556. { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
  557. { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
  558. { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
  559. { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
  560. { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
  561. { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
  562. { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
  563. { MUA, 0, "(?(?=a)ab)", "a" },
  564. { MUA, 0, "(?(?<!b)c)", "b" },
  565. { MUA, 0, "(?(DEFINE)a(b))", "a" },
  566. { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
  567. { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
  568. { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
  569. { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
  570. { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
  571. { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
  572. { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
  573. { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
  574. { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
  575. { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
  576. { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
  577. { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
  578. { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
  579. { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
  580. { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
  581. { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
  582. { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
  583. { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
  584. { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
  585. { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
  586. /* Set start of match. */
  587. { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
  588. { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
  589. { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
  590. { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
  591. { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
  592. /* First line. */
  593. { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
  594. { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
  595. { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
  596. { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
  597. { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
  598. { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
  599. { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
  600. { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
  601. { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
  602. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
  603. { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
  604. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
  605. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
  606. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
  607. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
  608. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
  609. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
  610. { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
  611. { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
  612. { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
  613. /* Recurse. */
  614. { MUA, 0, "(a)(?1)", "aa" },
  615. { MUA, 0, "((a))(?1)", "aa" },
  616. { MUA, 0, "(b|a)(?1)", "aa" },
  617. { MUA, 0, "(b|(a))(?1)", "aa" },
  618. { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
  619. { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
  620. { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
  621. { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
  622. { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
  623. { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
  624. { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
  625. { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
  626. { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
  627. { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
  628. { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
  629. { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
  630. { MUA, 0, "b|<(?R)*>", "<<b>" },
  631. { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
  632. { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
  633. { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
  634. { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
  635. { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
  636. { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
  637. { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
  638. { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
  639. { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
  640. { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
  641. { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
  642. { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
  643. /* 16 bit specific tests. */
  644. { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
  645. { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
  646. { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
  647. { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
  648. { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
  649. { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
  650. { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
  651. { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
  652. { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
  653. { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
  654. { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
  655. { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
  656. { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
  657. { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
  658. { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
  659. { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
  660. { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
  661. { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
  662. { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
  663. { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
  664. { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
  665. { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
  666. { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
  667. { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
  668. { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
  669. { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
  670. { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
  671. { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
  672. { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
  673. { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
  674. { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
  675. /* Partial matching. */
  676. { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
  677. { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
  678. { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
  679. { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
  680. { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
  681. { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
  682. { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
  683. { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
  684. /* (*MARK) verb. */
  685. { MUA, 0, "a(*MARK:aa)a", "ababaa" },
  686. { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
  687. { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
  688. { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
  689. { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
  690. { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
  691. { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
  692. { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
  693. { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
  694. { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
  695. { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
  696. { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
  697. { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
  698. { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
  699. { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
  700. /* (*COMMIT) verb. */
  701. { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
  702. { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
  703. { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
  704. { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
  705. { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
  706. { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
  707. /* (*PRUNE) verb. */
  708. { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
  709. { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
  710. { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
  711. { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
  712. { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
  713. { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
  714. { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
  715. { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
  716. { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
  717. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
  718. { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
  719. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
  720. { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
  721. { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
  722. { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
  723. { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
  724. { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
  725. { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
  726. { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
  727. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
  728. { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
  729. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
  730. { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
  731. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
  732. { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
  733. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
  734. { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
  735. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
  736. { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
  737. { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
  738. /* (*SKIP) verb. */
  739. { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
  740. /* (*THEN) verb. */
  741. { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
  742. { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
  743. { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
  744. { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
  745. { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
  746. { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
  747. { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
  748. { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
  749. { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
  750. /* Deep recursion. */
  751. { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
  752. { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
  753. { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
  754. /* Deep recursion: Stack limit reached. */
  755. { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
  756. { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
  757. { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
  758. { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
  759. { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
  760. { 0, 0, NULL, NULL }
  761. };
  762. static const unsigned char *tables(int mode)
  763. {
  764. /* The purpose of this function to allow valgrind
  765. for reporting invalid reads and writes. */
  766. static unsigned char *tables_copy;
  767. const char *errorptr;
  768. int erroroffset;
  769. unsigned char *default_tables;
  770. #if defined SUPPORT_PCRE8
  771. pcre *regex;
  772. char null_str[1] = { 0 };
  773. #elif defined SUPPORT_PCRE16
  774. pcre16 *regex;
  775. PCRE_UCHAR16 null_str[1] = { 0 };
  776. #elif defined SUPPORT_PCRE32
  777. pcre32 *regex;
  778. PCRE_UCHAR32 null_str[1] = { 0 };
  779. #endif
  780. if (mode) {
  781. if (tables_copy)
  782. free(tables_copy);
  783. tables_copy = NULL;
  784. return NULL;
  785. }
  786. if (tables_copy)
  787. return tables_copy;
  788. default_tables = NULL;
  789. #if defined SUPPORT_PCRE8
  790. regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
  791. if (regex) {
  792. pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
  793. pcre_free(regex);
  794. }
  795. #elif defined SUPPORT_PCRE16
  796. regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
  797. if (regex) {
  798. pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
  799. pcre16_free(regex);
  800. }
  801. #elif defined SUPPORT_PCRE32
  802. regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
  803. if (regex) {
  804. pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
  805. pcre32_free(regex);
  806. }
  807. #endif
  808. /* Shouldn't ever happen. */
  809. if (!default_tables)
  810. return NULL;
  811. /* Unfortunately this value cannot get from pcre_fullinfo.
  812. Since this is a test program, this is acceptable at the moment. */
  813. tables_copy = (unsigned char *)malloc(1088);
  814. if (!tables_copy)
  815. return NULL;
  816. memcpy(tables_copy, default_tables, 1088);
  817. return tables_copy;
  818. }
  819. #ifdef SUPPORT_PCRE8
  820. static pcre_jit_stack* callback8(void *arg)
  821. {
  822. return (pcre_jit_stack *)arg;
  823. }
  824. #endif
  825. #ifdef SUPPORT_PCRE16
  826. static pcre16_jit_stack* callback16(void *arg)
  827. {
  828. return (pcre16_jit_stack *)arg;
  829. }
  830. #endif
  831. #ifdef SUPPORT_PCRE32
  832. static pcre32_jit_stack* callback32(void *arg)
  833. {
  834. return (pcre32_jit_stack *)arg;
  835. }
  836. #endif
  837. #ifdef SUPPORT_PCRE8
  838. static pcre_jit_stack *stack8;
  839. static pcre_jit_stack *getstack8(void)
  840. {
  841. if (!stack8)
  842. stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
  843. return stack8;
  844. }
  845. static void setstack8(pcre_extra *extra)
  846. {
  847. if (!extra) {
  848. if (stack8)
  849. pcre_jit_stack_free(stack8);
  850. stack8 = NULL;
  851. return;
  852. }
  853. pcre_assign_jit_stack(extra, callback8, getstack8());
  854. }
  855. #endif /* SUPPORT_PCRE8 */
  856. #ifdef SUPPORT_PCRE16
  857. static pcre16_jit_stack *stack16;
  858. static pcre16_jit_stack *getstack16(void)
  859. {
  860. if (!stack16)
  861. stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
  862. return stack16;
  863. }
  864. static void setstack16(pcre16_extra *extra)
  865. {
  866. if (!extra) {
  867. if (stack16)
  868. pcre16_jit_stack_free(stack16);
  869. stack16 = NULL;
  870. return;
  871. }
  872. pcre16_assign_jit_stack(extra, callback16, getstack16());
  873. }
  874. #endif /* SUPPORT_PCRE8 */
  875. #ifdef SUPPORT_PCRE32
  876. static pcre32_jit_stack *stack32;
  877. static pcre32_jit_stack *getstack32(void)
  878. {
  879. if (!stack32)
  880. stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
  881. return stack32;
  882. }
  883. static void setstack32(pcre32_extra *extra)
  884. {
  885. if (!extra) {
  886. if (stack32)
  887. pcre32_jit_stack_free(stack32);
  888. stack32 = NULL;
  889. return;
  890. }
  891. pcre32_assign_jit_stack(extra, callback32, getstack32());
  892. }
  893. #endif /* SUPPORT_PCRE8 */
  894. #ifdef SUPPORT_PCRE16
  895. static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
  896. {
  897. unsigned char *iptr = (unsigned char*)input;
  898. PCRE_UCHAR16 *optr = output;
  899. unsigned int c;
  900. if (max_length == 0)
  901. return 0;
  902. while (*iptr && max_length > 1) {
  903. c = 0;
  904. if (offsetmap)
  905. *offsetmap++ = (int)(iptr - (unsigned char*)input);
  906. if (*iptr < 0xc0)
  907. c = *iptr++;
  908. else if (!(*iptr & 0x20)) {
  909. c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
  910. iptr += 2;
  911. } else if (!(*iptr & 0x10)) {
  912. c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
  913. iptr += 3;
  914. } else if (!(*iptr & 0x08)) {
  915. c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
  916. iptr += 4;
  917. }
  918. if (c < 65536) {
  919. *optr++ = c;
  920. max_length--;
  921. } else if (max_length <= 2) {
  922. *optr = '\0';
  923. return (int)(optr - output);
  924. } else {
  925. c -= 0x10000;
  926. *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
  927. *optr++ = 0xdc00 | (c & 0x3ff);
  928. max_length -= 2;
  929. if (offsetmap)
  930. offsetmap++;
  931. }
  932. }
  933. if (offsetmap)
  934. *offsetmap = (int)(iptr - (unsigned char*)input);
  935. *optr = '\0';
  936. return (int)(optr - output);
  937. }
  938. static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
  939. {
  940. unsigned char *iptr = (unsigned char*)input;
  941. PCRE_UCHAR16 *optr = output;
  942. if (max_length == 0)
  943. return 0;
  944. while (*iptr && max_length > 1) {
  945. *optr++ = *iptr++;
  946. max_length--;
  947. }
  948. *optr = '\0';
  949. return (int)(optr - output);
  950. }
  951. #define REGTEST_MAX_LENGTH16 4096
  952. static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
  953. static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
  954. #endif /* SUPPORT_PCRE16 */
  955. #ifdef SUPPORT_PCRE32
  956. static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
  957. {
  958. unsigned char *iptr = (unsigned char*)input;
  959. PCRE_UCHAR32 *optr = output;
  960. unsigned int c;
  961. if (max_length == 0)
  962. return 0;
  963. while (*iptr && max_length > 1) {
  964. c = 0;
  965. if (offsetmap)
  966. *offsetmap++ = (int)(iptr - (unsigned char*)input);
  967. if (*iptr < 0xc0)
  968. c = *iptr++;
  969. else if (!(*iptr & 0x20)) {
  970. c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
  971. iptr += 2;
  972. } else if (!(*iptr & 0x10)) {
  973. c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
  974. iptr += 3;
  975. } else if (!(*iptr & 0x08)) {
  976. c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
  977. iptr += 4;
  978. }
  979. *optr++ = c;
  980. max_length--;
  981. }
  982. if (offsetmap)
  983. *offsetmap = (int)(iptr - (unsigned char*)input);
  984. *optr = 0;
  985. return (int)(optr - output);
  986. }
  987. static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
  988. {
  989. unsigned char *iptr = (unsigned char*)input;
  990. PCRE_UCHAR32 *optr = output;
  991. if (max_length == 0)
  992. return 0;
  993. while (*iptr && max_length > 1) {
  994. *optr++ = *iptr++;
  995. max_length--;
  996. }
  997. *optr = '\0';
  998. return (int)(optr - output);
  999. }
  1000. #define REGTEST_MAX_LENGTH32 4096
  1001. static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
  1002. static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
  1003. #endif /* SUPPORT_PCRE32 */
  1004. static int check_ascii(const char *input)
  1005. {
  1006. const unsigned char *ptr = (unsigned char *)input;
  1007. while (*ptr) {
  1008. if (*ptr > 127)
  1009. return 0;
  1010. ptr++;
  1011. }
  1012. return 1;
  1013. }
  1014. static int regression_tests(void)
  1015. {
  1016. struct regression_test_case *current = regression_test_cases;
  1017. const char *error;
  1018. char *cpu_info;
  1019. int i, err_offs;
  1020. int is_successful, is_ascii;
  1021. int total = 0;
  1022. int successful = 0;
  1023. int successful_row = 0;
  1024. int counter = 0;
  1025. int study_mode;
  1026. int utf = 0, ucp = 0;
  1027. int disabled_flags = 0;
  1028. #ifdef SUPPORT_PCRE8
  1029. pcre *re8;
  1030. pcre_extra *extra8;
  1031. pcre_extra dummy_extra8;
  1032. int ovector8_1[32];
  1033. int ovector8_2[32];
  1034. int return_value8[2];
  1035. unsigned char *mark8_1, *mark8_2;
  1036. #endif
  1037. #ifdef SUPPORT_PCRE16
  1038. pcre16 *re16;
  1039. pcre16_extra *extra16;
  1040. pcre16_extra dummy_extra16;
  1041. int ovector16_1[32];
  1042. int ovector16_2[32];
  1043. int return_value16[2];
  1044. PCRE_UCHAR16 *mark16_1, *mark16_2;
  1045. int length16;
  1046. #endif
  1047. #ifdef SUPPORT_PCRE32
  1048. pcre32 *re32;
  1049. pcre32_extra *extra32;
  1050. pcre32_extra dummy_extra32;
  1051. int ovector32_1[32];
  1052. int ovector32_2[32];
  1053. int return_value32[2];
  1054. PCRE_UCHAR32 *mark32_1, *mark32_2;
  1055. int length32;
  1056. #endif
  1057. /* This test compares the behaviour of interpreter and JIT. Although disabling
  1058. utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
  1059. still considered successful from pcre_jit_test point of view. */
  1060. #if defined SUPPORT_PCRE8
  1061. pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
  1062. #elif defined SUPPORT_PCRE16
  1063. pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
  1064. #elif defined SUPPORT_PCRE32
  1065. pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
  1066. #endif
  1067. printf("Running JIT regression tests\n");
  1068. printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
  1069. #if defined SUPPORT_PCRE8
  1070. pcre_config(PCRE_CONFIG_UTF8, &utf);
  1071. pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
  1072. #elif defined SUPPORT_PCRE16
  1073. pcre16_config(PCRE_CONFIG_UTF16, &utf);
  1074. pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
  1075. #elif defined SUPPORT_PCRE16
  1076. pcre32_config(PCRE_CONFIG_UTF32, &utf);
  1077. pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
  1078. #endif
  1079. if (!utf)
  1080. disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
  1081. if (!ucp)
  1082. disabled_flags |= PCRE_UCP;
  1083. #ifdef SUPPORT_PCRE8
  1084. printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
  1085. #endif
  1086. #ifdef SUPPORT_PCRE16
  1087. printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
  1088. #endif
  1089. #ifdef SUPPORT_PCRE32
  1090. printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
  1091. #endif
  1092. while (current->pattern) {
  1093. /* printf("\nPattern: %s :\n", current->pattern); */
  1094. total++;
  1095. is_ascii = 0;
  1096. if (!(current->start_offset & F_PROPERTY))
  1097. is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
  1098. if (current->flags & PCRE_PARTIAL_SOFT)
  1099. study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
  1100. else if (current->flags & PCRE_PARTIAL_HARD)
  1101. study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
  1102. else
  1103. study_mode = PCRE_STUDY_JIT_COMPILE;
  1104. error = NULL;
  1105. #ifdef SUPPORT_PCRE8
  1106. re8 = NULL;
  1107. if (!(current->start_offset & F_NO8))
  1108. re8 = pcre_compile(current->pattern,
  1109. current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
  1110. &error, &err_offs, tables(0));
  1111. extra8 = NULL;
  1112. if (re8) {
  1113. error = NULL;
  1114. extra8 = pcre_study(re8, study_mode, &error);
  1115. if (!extra8) {
  1116. printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
  1117. pcre_free(re8);
  1118. re8 = NULL;
  1119. }
  1120. else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
  1121. printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
  1122. pcre_free_study(extra8);
  1123. pcre_free(re8);
  1124. re8 = NULL;
  1125. }
  1126. extra8->flags |= PCRE_EXTRA_MARK;
  1127. } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO8))
  1128. printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
  1129. #endif
  1130. #ifdef SUPPORT_PCRE16
  1131. if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
  1132. convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
  1133. else
  1134. copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
  1135. re16 = NULL;
  1136. if (!(current->start_offset & F_NO16))
  1137. re16 = pcre16_compile(regtest_buf16,
  1138. current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
  1139. &error, &err_offs, tables(0));
  1140. extra16 = NULL;
  1141. if (re16) {
  1142. error = NULL;
  1143. extra16 = pcre16_study(re16, study_mode, &error);
  1144. if (!extra16) {
  1145. printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
  1146. pcre16_free(re16);
  1147. re16 = NULL;
  1148. }
  1149. else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
  1150. printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
  1151. pcre16_free_study(extra16);
  1152. pcre16_free(re16);
  1153. re16 = NULL;
  1154. }
  1155. extra16->flags |= PCRE_EXTRA_MARK;
  1156. } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO16))
  1157. printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
  1158. #endif
  1159. #ifdef SUPPORT_PCRE32
  1160. if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
  1161. convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
  1162. else
  1163. copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
  1164. re32 = NULL;
  1165. if (!(current->start_offset & F_NO32))
  1166. re32 = pcre32_compile(regtest_buf32,
  1167. current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
  1168. &error, &err_offs, tables(0));
  1169. extra32 = NULL;
  1170. if (re32) {
  1171. error = NULL;
  1172. extra32 = pcre32_study(re32, study_mode, &error);
  1173. if (!extra32) {
  1174. printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
  1175. pcre32_free(re32);
  1176. re32 = NULL;
  1177. }
  1178. if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
  1179. printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
  1180. pcre32_free_study(extra32);
  1181. pcre32_free(re32);
  1182. re32 = NULL;
  1183. }
  1184. extra32->flags |= PCRE_EXTRA_MARK;
  1185. } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO32))
  1186. printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
  1187. #endif
  1188. counter++;
  1189. if ((counter & 0x3) != 0) {
  1190. #ifdef SUPPORT_PCRE8
  1191. setstack8(NULL);
  1192. #endif
  1193. #ifdef SUPPORT_PCRE16
  1194. setstack16(NULL);
  1195. #endif
  1196. #ifdef SUPPORT_PCRE32
  1197. setstack32(NULL);
  1198. #endif
  1199. }
  1200. #ifdef SUPPORT_PCRE8
  1201. return_value8[0] = -1000;
  1202. return_value8[1] = -1000;
  1203. for (i = 0; i < 32; ++i)
  1204. ovector8_1[i] = -2;
  1205. for (i = 0; i < 32; ++i)
  1206. ovector8_2[i] = -2;
  1207. if (re8) {
  1208. mark8_1 = NULL;
  1209. mark8_2 = NULL;
  1210. extra8->mark = &mark8_1;
  1211. if ((counter & 0x1) != 0) {
  1212. setstack8(extra8);
  1213. return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
  1214. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
  1215. } else
  1216. return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
  1217. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
  1218. memset(&dummy_extra8, 0, sizeof(pcre_extra));
  1219. dummy_extra8.flags = PCRE_EXTRA_MARK;
  1220. if (current->start_offset & F_STUDY) {
  1221. dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
  1222. dummy_extra8.study_data = extra8->study_data;
  1223. }
  1224. dummy_extra8.mark = &mark8_2;
  1225. return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
  1226. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
  1227. }
  1228. #endif
  1229. #ifdef SUPPORT_PCRE16
  1230. return_value16[0] = -1000;
  1231. return_value16[1] = -1000;
  1232. for (i = 0; i < 32; ++i)
  1233. ovector16_1[i] = -2;
  1234. for (i = 0; i < 32; ++i)
  1235. ovector16_2[i] = -2;
  1236. if (re16) {
  1237. mark16_1 = NULL;
  1238. mark16_2 = NULL;
  1239. if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
  1240. length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
  1241. else
  1242. length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
  1243. extra16->mark = &mark16_1;
  1244. if ((counter & 0x1) != 0) {
  1245. setstack16(extra16);
  1246. return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
  1247. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
  1248. } else
  1249. return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
  1250. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
  1251. memset(&dummy_extra16, 0, sizeof(pcre16_extra));
  1252. dummy_extra16.flags = PCRE_EXTRA_MARK;
  1253. if (current->start_offset & F_STUDY) {
  1254. dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
  1255. dummy_extra16.study_data = extra16->study_data;
  1256. }
  1257. dummy_extra16.mark = &mark16_2;
  1258. return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
  1259. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
  1260. }
  1261. #endif
  1262. #ifdef SUPPORT_PCRE32
  1263. return_value32[0] = -1000;
  1264. return_value32[1] = -1000;
  1265. for (i = 0; i < 32; ++i)
  1266. ovector32_1[i] = -2;
  1267. for (i = 0; i < 32; ++i)
  1268. ovector32_2[i] = -2;
  1269. if (re32) {
  1270. mark32_1 = NULL;
  1271. mark32_2 = NULL;
  1272. if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
  1273. length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
  1274. else
  1275. length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
  1276. extra32->mark = &mark32_1;
  1277. if ((counter & 0x1) != 0) {
  1278. setstack32(extra32);
  1279. return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
  1280. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
  1281. } else
  1282. return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
  1283. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
  1284. memset(&dummy_extra32, 0, sizeof(pcre32_extra));
  1285. dummy_extra32.flags = PCRE_EXTRA_MARK;
  1286. if (current->start_offset & F_STUDY) {
  1287. dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
  1288. dummy_extra32.study_data = extra32->study_data;
  1289. }
  1290. dummy_extra32.mark = &mark32_2;
  1291. return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
  1292. current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
  1293. }
  1294. #endif
  1295. /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
  1296. return_value8[0], return_value16[0], return_value32[0],
  1297. ovector8_1[0], ovector8_1[1],
  1298. ovector16_1[0], ovector16_1[1],
  1299. ovector32_1[0], ovector32_1[1],
  1300. (current->flags & PCRE_CASELESS) ? "C" : ""); */
  1301. /* If F_DIFF is set, just run the test, but do not compare the results.
  1302. Segfaults can still be captured. */
  1303. is_successful = 1;
  1304. if (!(current->start_offset & F_DIFF)) {
  1305. #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
  1306. if (!(current->start_offset & F_FORCECONV)) {
  1307. int return_value;
  1308. /* All results must be the same. */
  1309. #ifdef SUPPORT_PCRE8
  1310. if ((return_value = return_value8[0]) != return_value8[1]) {
  1311. printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
  1312. return_value8[0], return_value8[1], total, current->pattern, current->input);
  1313. is_successful = 0;
  1314. } else
  1315. #endif
  1316. #ifdef SUPPORT_PCRE16
  1317. if ((return_value = return_value16[0]) != return_value16[1]) {
  1318. printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
  1319. return_value16[0], return_value16[1], total, current->pattern, current->input);
  1320. is_successful = 0;
  1321. } else
  1322. #endif
  1323. #ifdef SUPPORT_PCRE32
  1324. if ((return_value = return_value32[0]) != return_value32[1]) {
  1325. printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
  1326. return_value32[0], return_value32[1], total, current->pattern, current->input);
  1327. is_successful = 0;
  1328. } else
  1329. #endif
  1330. #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
  1331. if (return_value8[0] != return_value16[0]) {
  1332. printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
  1333. return_value8[0], return_value16[0],
  1334. total, current->pattern, current->input);
  1335. is_successful = 0;
  1336. } else
  1337. #endif
  1338. #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
  1339. if (return_value8[0] != return_value32[0]) {
  1340. printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
  1341. return_value8[0], return_value32[0],
  1342. total, current->pattern, current->input);
  1343. is_successful = 0;
  1344. } else
  1345. #endif
  1346. #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
  1347. if (return_value16[0] != return_value32[0]) {
  1348. printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
  1349. return_value16[0], return_value32[0],
  1350. total, current->pattern, current->input);
  1351. is_successful = 0;
  1352. } else
  1353. #endif
  1354. if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
  1355. if (return_value == PCRE_ERROR_PARTIAL) {
  1356. return_value = 2;
  1357. } else {
  1358. return_value *= 2;
  1359. }
  1360. #ifdef SUPPORT_PCRE8
  1361. return_value8[0] = return_value;
  1362. #endif
  1363. #ifdef SUPPORT_PCRE16
  1364. return_value16[0] = return_value;
  1365. #endif
  1366. #ifdef SUPPORT_PCRE32
  1367. return_value32[0] = return_value;
  1368. #endif
  1369. /* Transform back the results. */
  1370. if (current->flags & PCRE_UTF8) {
  1371. #ifdef SUPPORT_PCRE16
  1372. for (i = 0; i < return_value; ++i) {
  1373. if (ovector16_1[i] >= 0)
  1374. ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
  1375. if (ovector16_2[i] >= 0)
  1376. ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
  1377. }
  1378. #endif
  1379. #ifdef SUPPORT_PCRE32
  1380. for (i = 0; i < return_value; ++i) {
  1381. if (ovector32_1[i] >= 0)
  1382. ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
  1383. if (ovector32_2[i] >= 0)
  1384. ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
  1385. }
  1386. #endif
  1387. }
  1388. for (i = 0; i < return_value; ++i) {
  1389. #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
  1390. if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
  1391. printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
  1392. i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
  1393. total, current->pattern, current->input);
  1394. is_successful = 0;
  1395. }
  1396. #endif
  1397. #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
  1398. if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
  1399. printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
  1400. i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
  1401. total, current->pattern, current->input);
  1402. is_successful = 0;
  1403. }
  1404. #endif
  1405. #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
  1406. if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
  1407. printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
  1408. i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
  1409. total, current->pattern, current->input);
  1410. is_successful = 0;
  1411. }
  1412. #endif
  1413. }
  1414. }
  1415. } else
  1416. #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
  1417. {
  1418. /* Only the 8 bit and 16 bit results must be equal. */
  1419. #ifdef SUPPORT_PCRE8
  1420. if (return_value8[0] != return_value8[1]) {
  1421. printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
  1422. return_value8[0], return_value8[1], total, current->pattern, current->input);
  1423. is_successful = 0;
  1424. } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
  1425. if (return_value8[0] == PCRE_ERROR_PARTIAL)
  1426. return_value8[0] = 2;
  1427. else
  1428. return_value8[0] *= 2;
  1429. for (i = 0; i < return_value8[0]; ++i)
  1430. if (ovector8_1[i] != ovector8_2[i]) {
  1431. printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
  1432. i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
  1433. is_successful = 0;
  1434. }
  1435. }
  1436. #endif
  1437. #ifdef SUPPORT_PCRE16
  1438. if (return_value16[0] != return_value16[1]) {
  1439. printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
  1440. return_value16[0], return_value16[1], total, current->pattern, current->input);
  1441. is_successful = 0;
  1442. } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
  1443. if (return_value16[0] == PCRE_ERROR_PARTIAL)
  1444. return_value16[0] = 2;
  1445. else
  1446. return_value16[0] *= 2;
  1447. for (i = 0; i < return_value16[0]; ++i)
  1448. if (ovector16_1[i] != ovector16_2[i]) {
  1449. printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
  1450. i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
  1451. is_successful = 0;
  1452. }
  1453. }
  1454. #endif
  1455. #ifdef SUPPORT_PCRE32
  1456. if (return_value32[0] != return_value32[1]) {
  1457. printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
  1458. return_value32[0], return_value32[1], total, current->pattern, current->input);
  1459. is_successful = 0;
  1460. } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
  1461. if (return_value32[0] == PCRE_ERROR_PARTIAL)
  1462. return_value32[0] = 2;
  1463. else
  1464. return_value32[0] *= 2;
  1465. for (i = 0; i < return_value32[0]; ++i)
  1466. if (ovector32_1[i] != ovector32_2[i]) {
  1467. printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
  1468. i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
  1469. is_successful = 0;
  1470. }
  1471. }
  1472. #endif
  1473. }
  1474. }
  1475. if (is_successful) {
  1476. #ifdef SUPPORT_PCRE8
  1477. if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii)) {
  1478. if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
  1479. printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
  1480. total, current->pattern, current->input);
  1481. is_successful = 0;
  1482. }
  1483. if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
  1484. printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
  1485. total, current->pattern, current->input);
  1486. is_successful = 0;
  1487. }
  1488. }
  1489. #endif
  1490. #ifdef SUPPORT_PCRE16
  1491. if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii)) {
  1492. if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
  1493. printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
  1494. total, current->pattern, current->input);
  1495. is_successful = 0;
  1496. }
  1497. if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
  1498. printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
  1499. total, current->pattern, current->input);
  1500. is_successful = 0;
  1501. }
  1502. }
  1503. #endif
  1504. #ifdef SUPPORT_PCRE32
  1505. if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii)) {
  1506. if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
  1507. printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
  1508. total, current->pattern, current->input);
  1509. is_successful = 0;
  1510. }
  1511. if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
  1512. printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
  1513. total, current->pattern, current->input);
  1514. is_successful = 0;
  1515. }
  1516. }
  1517. #endif
  1518. }
  1519. if (is_successful) {
  1520. #ifdef SUPPORT_PCRE8
  1521. if (mark8_1 != mark8_2) {
  1522. printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
  1523. total, current->pattern, current->input);
  1524. is_successful = 0;
  1525. }
  1526. #endif
  1527. #ifdef SUPPORT_PCRE16
  1528. if (mark16_1 != mark16_2) {
  1529. printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
  1530. total, current->pattern, current->input);
  1531. is_successful = 0;
  1532. }
  1533. #endif
  1534. #ifdef SUPPORT_PCRE32
  1535. if (mark32_1 != mark32_2) {
  1536. printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
  1537. total, current->pattern, current->input);
  1538. is_successful = 0;
  1539. }
  1540. #endif
  1541. }
  1542. #ifdef SUPPORT_PCRE8
  1543. if (re8) {
  1544. pcre_free_study(extra8);
  1545. pcre_free(re8);
  1546. }
  1547. #endif
  1548. #ifdef SUPPORT_PCRE16
  1549. if (re16) {
  1550. pcre16_free_study(extra16);
  1551. pcre16_free(re16);
  1552. }
  1553. #endif
  1554. #ifdef SUPPORT_PCRE32
  1555. if (re32) {
  1556. pcre32_free_study(extra32);
  1557. pcre32_free(re32);
  1558. }
  1559. #endif
  1560. if (is_successful) {
  1561. successful++;
  1562. successful_row++;
  1563. printf(".");
  1564. if (successful_row >= 60) {
  1565. successful_row = 0;
  1566. printf("\n");
  1567. }
  1568. } else
  1569. successful_row = 0;
  1570. fflush(stdout);
  1571. current++;
  1572. }
  1573. tables(1);
  1574. #ifdef SUPPORT_PCRE8
  1575. setstack8(NULL);
  1576. #endif
  1577. #ifdef SUPPORT_PCRE16
  1578. setstack16(NULL);
  1579. #endif
  1580. #ifdef SUPPORT_PCRE32
  1581. setstack32(NULL);
  1582. #endif
  1583. if (total == successful) {
  1584. printf("\nAll JIT regression tests are successfully passed.\n");
  1585. return 0;
  1586. } else {
  1587. printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
  1588. return 1;
  1589. }
  1590. }
  1591. /* End of pcre_jit_test.c */