1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
12 /* Use the internal info for displaying the results of pcre_study(). */
16 /* It is possible to compile this test program without including support for
17 testing the POSIX interface, though this is not available via the standard
21 #include "pcreposix.h"
24 #ifndef CLOCKS_PER_SEC
26 #define CLOCKS_PER_SEC CLK_TCK
28 #define CLOCKS_PER_SEC 100
32 #define LOOPREPEAT 20000
36 static int log_store = 0;
37 static size_t gotten_store;
41 static int utf8_table1[] = {
42 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
44 static int utf8_table2[] = {
45 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
47 static int utf8_table3[] = {
48 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
51 /*************************************************
52 * Convert character value to UTF-8 *
53 *************************************************/
55 /* This function takes an integer value in the range 0 - 0x7fffffff
56 and encodes it as a UTF-8 character in 0 to 6 bytes.
59 cvalue the character value
60 buffer pointer to buffer for result - at least 6 bytes long
62 Returns: number of characters placed in the buffer
63 -1 if input character is negative
64 0 if input character is positive but too big (only when
65 int is longer than 32 bits)
69 ord2utf8(int cvalue, unsigned char *buffer)
72 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73 if (cvalue <= utf8_table1[i]) break;
74 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75 if (cvalue < 0) return -1;
76 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
78 for (j = 0; j < i; j++)
80 *buffer++ = 0x80 | (cvalue & 0x3f);
87 /*************************************************
88 * Convert UTF-8 string to value *
89 *************************************************/
91 /* This function takes one or more bytes that represents a UTF-8 character,
92 and returns the value of the character.
95 buffer a pointer to the byte vector
96 vptr a pointer to an int to receive the value
98 Returns: > 0 => the number of bytes consumed
99 -6 to 0 => malformed UTF-8 character at offset = (-return)
103 utf82ord(unsigned char *buffer, int *vptr)
109 for (i = -1; i < 6; i++) /* i is number of additional bytes */
111 if ((d & 0x80) == 0) break;
115 if (i == -1) { *vptr = c; return 1; } /* ascii character */
116 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
118 /* i now has a value in the range 1-5 */
120 d = c & utf8_table3[i];
123 for (j = 0; j < i; j++)
126 if ((c & 0xc0) != 0x80) return -(j+1);
127 d |= (c & 0x3f) << s;
131 /* Check that encoding was the correct unique one */
133 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134 if (d <= utf8_table1[j]) break;
135 if (j != i) return -(i+1);
148 /* Debugging function to print the internal form of the regex. This is the same
149 code as contained in pcre.c under the DEBUG macro. */
151 static const char *OP_names[] = {
152 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154 "Opt", "^", "$", "Any", "chars", "not",
155 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158 "*", "*?", "+", "+?", "?", "??", "{", "{",
159 "class", "Ref", "Recurse",
160 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162 "Brazero", "Braminzero", "Bra"
166 static void print_internals(pcre *re)
168 unsigned char *code = ((real_pcre *)re)->code;
170 fprintf(outfile, "------------------------------------------------------------------\n");
177 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
181 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
188 fprintf(outfile, " %s\n", OP_names[*code]);
189 fprintf(outfile, "------------------------------------------------------------------\n");
193 fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
198 fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
203 fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
208 charlength = *(++code);
209 fprintf(outfile, "%3d ", charlength);
210 while (charlength-- > 0)
211 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
212 else fprintf(outfile, "\\x%02x", c);
222 case OP_ASSERTBACK_NOT:
224 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
229 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
244 case OP_TYPEMINQUERY:
245 if (*code >= OP_TYPESTAR)
246 fprintf(outfile, " %s", OP_names[code[1]]);
247 else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
248 else fprintf(outfile, " \\x%02x", c);
249 fprintf(outfile, "%s", OP_names[*code++]);
255 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
256 else fprintf(outfile, " \\x%02x{", c);
257 if (*code != OP_EXACT) fprintf(outfile, ",");
258 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
259 if (*code == OP_MINUPTO) fprintf(outfile, "?");
266 fprintf(outfile, " %s{", OP_names[code[3]]);
267 if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
268 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
269 if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
274 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
275 else fprintf(outfile, " [^\\x%02x]", c);
284 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
285 else fprintf(outfile, " [^\\x%02x]", c);
286 fprintf(outfile, "%s", OP_names[*code++]);
292 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
293 else fprintf(outfile, " [^\\x%02x]{", c);
294 if (*code != OP_NOTEXACT) fprintf(outfile, ",");
295 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
296 if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
301 fprintf(outfile, " \\%d", *(++code));
303 goto CLASS_REF_REPEAT;
309 fprintf(outfile, " [");
311 for (i = 0; i < 256; i++)
313 if ((code[i/8] & (1 << (i&7))) != 0)
316 for (j = i+1; j < 256; j++)
317 if ((code[j/8] & (1 << (j&7))) == 0) break;
318 if (i == '-' || i == ']') fprintf(outfile, "\\");
319 if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
322 fprintf(outfile, "-");
323 if (j == '-' || j == ']') fprintf(outfile, "\\");
324 if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
329 fprintf(outfile, "]");
342 fprintf(outfile, "%s", OP_names[*code]);
347 min = (code[1] << 8) + code[2];
348 max = (code[3] << 8) + code[4];
349 if (max == 0) fprintf(outfile, "{%d,}", min);
350 else fprintf(outfile, "{%d,%d}", min, max);
351 if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
361 /* Anything else is just a one-node item */
364 fprintf(outfile, " %s", OP_names[*code]);
369 fprintf(outfile, "\n");
375 /* Character string printing function. A "normal" and a UTF-8 version. */
377 static void pchars(unsigned char *p, int length, int utf8)
384 int rc = utf82ord(p, &c);
389 if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
390 else fprintf(outfile, "\\x{%02x}", c);
395 /* Not UTF-8, or malformed UTF-8 */
397 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
398 else fprintf(outfile, "\\x%02x", c);
404 /* Alternative malloc function, to test functionality and show the size of the
407 static void *new_malloc(size_t size)
411 fprintf(outfile, "Memory allocation (code space): %d\n",
412 (int)((int)size - offsetof(real_pcre, code[0])));
419 /* Get one piece of information from the pcre_fullinfo() function */
421 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
424 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
425 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
431 /* Read lines from named file or stdin and write to named file or stdout; lines
432 consist of a regular expression, in delimiters and optionally followed by
433 options, followed by a set of test data, terminated by an empty line. */
435 int main(int argc, char **argv)
437 FILE *infile = stdin;
439 int study_options = 0;
447 unsigned char buffer[30000];
448 unsigned char dbuffer[1024];
450 /* Static so that new_malloc can use it. */
456 while (argc > 1 && argv[op][0] == '-')
458 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
460 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463 else if (strcmp(argv[op], "-p") == 0) posix = 1;
466 printf("*** Unknown option %s\n", argv[op]);
467 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
468 printf(" -d debug: show compiled code; implies -i\n"
469 " -i show information about compiled pattern\n"
470 " -p use POSIX interface\n"
471 " -s output store information\n"
472 " -t time compilation and execution\n");
479 /* Sort out the input and output files */
483 infile = fopen(argv[op], "r");
486 printf("** Failed to open %s\n", argv[op]);
493 outfile = fopen(argv[op+1], "w");
496 printf("** Failed to open %s\n", argv[op+1]);
501 /* Set alternative malloc function */
503 pcre_malloc = new_malloc;
505 /* Heading line, then prompt for first regex if stdin */
507 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
514 pcre_extra *extra = NULL;
516 #if !defined NOPOSIX /* There are still compilers that require no indent */
522 unsigned char *p, *pp, *ppp;
523 unsigned const char *tables = NULL;
525 int do_debug = debug;
528 int do_showinfo = showinfo;
531 int erroroffset, len, delimiter;
533 if (infile == stdin) printf(" re> ");
534 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
535 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
538 while (isspace(*p)) p++;
539 if (*p == 0) continue;
541 /* Get the delimiter and seek the end of the pattern; if is isn't
542 complete, read more. */
546 if (isalnum(delimiter) || delimiter == '\\')
548 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
558 if (*pp == '\\' && pp[1] != 0) pp++;
559 else if (*pp == delimiter) break;
564 len = sizeof(buffer) - (pp - buffer);
567 fprintf(outfile, "** Expression too long - missing delimiter?\n");
571 if (infile == stdin) printf(" > ");
572 if (fgets((char *)pp, len, infile) == NULL)
574 fprintf(outfile, "** Unexpected EOF\n");
578 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
581 /* If the first character after the delimiter is backslash, make
582 the pattern end with backslash. This is purely to provide a way
583 of testing for the error message when a pattern ends with backslash. */
585 if (pp[1] == '\\') *pp++ = '\\';
587 /* Terminate the pattern at the delimiter */
591 /* Look for options after final delimiter */
595 log_store = showstore; /* default from command line */
601 case 'g': do_g = 1; break;
602 case 'i': options |= PCRE_CASELESS; break;
603 case 'm': options |= PCRE_MULTILINE; break;
604 case 's': options |= PCRE_DOTALL; break;
605 case 'x': options |= PCRE_EXTENDED; break;
607 case '+': do_showrest = 1; break;
608 case 'A': options |= PCRE_ANCHORED; break;
609 case 'D': do_debug = do_showinfo = 1; break;
610 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611 case 'G': do_G = 1; break;
612 case 'I': do_showinfo = 1; break;
613 case 'M': log_store = 1; break;
616 case 'P': do_posix = 1; break;
619 case 'S': do_study = 1; break;
620 case 'U': options |= PCRE_UNGREEDY; break;
621 case 'X': options |= PCRE_EXTRA; break;
622 case '8': options |= PCRE_UTF8; utf8 = 1; break;
626 while (*ppp != '\n' && *ppp != ' ') ppp++;
628 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
630 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
633 tables = pcre_maketables();
637 case '\n': case ' ': break;
639 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
644 /* Handle compiling via the POSIX interface, which doesn't support the
645 timing, showing, or debugging options, nor the ability to pass over
646 local character tables. */
649 if (posix || do_posix)
653 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
654 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
655 rc = regcomp(&preg, (char *)p, cflags);
657 /* Compilation failed; go back for another re, skipping to blank line
658 if non-interactive. */
662 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
663 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
668 /* Handle compiling via the native interface */
671 #endif /* !defined NOPOSIX */
678 clock_t start_time = clock();
679 for (i = 0; i < LOOPREPEAT; i++)
681 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
682 if (re != NULL) free(re);
684 time_taken = clock() - start_time;
685 fprintf(outfile, "Compile time %.3f milliseconds\n",
686 ((double)time_taken * 1000.0) /
687 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
690 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
692 /* Compilation failed; go back for another re, skipping to blank line
693 if non-interactive. */
697 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
703 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
708 len = (int)strlen((char *)buffer);
709 while (len > 0 && isspace(buffer[len-1])) len--;
712 fprintf(outfile, "\n");
717 /* Compilation succeeded; print data if required. There are now two
718 info-returning functions. The old one has a limited interface and
719 returns only limited data. Check that it agrees with the newer one. */
723 int old_first_char, old_options, old_count;
724 int count, backrefmax, first_char, need_char;
727 if (do_debug) print_internals(re);
729 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
730 new_info(re, NULL, PCRE_INFO_SIZE, &size);
731 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
732 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
733 new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
734 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
736 old_count = pcre_info(re, &old_options, &old_first_char);
737 if (count < 0) fprintf(outfile,
738 "Error %d from pcre_info()\n", count);
741 if (old_count != count) fprintf(outfile,
742 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
745 if (old_first_char != first_char) fprintf(outfile,
746 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
747 first_char, old_first_char);
749 if (old_options != options) fprintf(outfile,
750 "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
754 if (size != gotten_store) fprintf(outfile,
755 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
758 fprintf(outfile, "Capturing subpattern count = %d\n", count);
760 fprintf(outfile, "Max back reference = %d\n", backrefmax);
761 if (options == 0) fprintf(outfile, "No options\n");
762 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
763 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
764 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
765 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
766 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
767 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
768 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
769 ((options & PCRE_EXTRA) != 0)? " extra" : "",
770 ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
771 ((options & PCRE_UTF8) != 0)? " utf8" : "");
773 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
774 fprintf(outfile, "Case state changes\n");
776 if (first_char == -1)
778 fprintf(outfile, "First char at start or follows \\n\n");
780 else if (first_char < 0)
782 fprintf(outfile, "No first char\n");
786 if (isprint(first_char))
787 fprintf(outfile, "First char = \'%c\'\n", first_char);
789 fprintf(outfile, "First char = %d\n", first_char);
794 fprintf(outfile, "No need char\n");
798 if (isprint(need_char))
799 fprintf(outfile, "Need char = \'%c\'\n", need_char);
801 fprintf(outfile, "Need char = %d\n", need_char);
805 /* If /S was present, study the regexp to generate additional info to
806 help with the matching. */
814 clock_t start_time = clock();
815 for (i = 0; i < LOOPREPEAT; i++)
816 extra = pcre_study(re, study_options, &error);
817 time_taken = clock() - start_time;
818 if (extra != NULL) free(extra);
819 fprintf(outfile, " Study time %.3f milliseconds\n",
820 ((double)time_taken * 1000.0)/
821 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
824 extra = pcre_study(re, study_options, &error);
826 fprintf(outfile, "Failed to study: %s\n", error);
827 else if (extra == NULL)
828 fprintf(outfile, "Study returned NULL\n");
830 else if (do_showinfo)
832 uschar *start_bits = NULL;
833 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
834 if (start_bits == NULL)
835 fprintf(outfile, "No starting character set\n");
840 fprintf(outfile, "Starting character set: ");
841 for (i = 0; i < 256; i++)
843 if ((start_bits[i/8] & (1<<(i%8))) != 0)
847 fprintf(outfile, "\n ");
850 if (isprint(i) && i != ' ')
852 fprintf(outfile, "%c ", i);
857 fprintf(outfile, "\\x%02x ", i);
862 fprintf(outfile, "\n");
868 /* Read data lines and test them */
873 unsigned char *bptr = dbuffer;
879 int start_offset = 0;
882 int size_offsets = sizeof(offsets)/sizeof(int);
886 if (infile == stdin) printf("data> ");
887 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
892 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
894 len = (int)strlen((char *)buffer);
895 while (len > 0 && isspace(buffer[len-1])) len--;
900 while (isspace(*p)) p++;
903 while ((c = *p++) != 0)
907 if (c == '\\') switch ((c = *p++))
909 case 'a': c = 7; break;
910 case 'b': c = '\b'; break;
911 case 'e': c = 27; break;
912 case 'f': c = '\f'; break;
913 case 'n': c = '\n'; break;
914 case 'r': c = '\r'; break;
915 case 't': c = '\t'; break;
916 case 'v': c = '\v'; break;
918 case '0': case '1': case '2': case '3':
919 case '4': case '5': case '6': case '7':
921 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
922 c = c * 8 + *p++ - '0';
927 /* Handle \x{..} specially - new Perl thing for utf8 */
931 unsigned char *pt = p;
933 while (isxdigit(*(++pt)))
934 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
937 unsigned char buffer[8];
939 utn = ord2utf8(c, buffer);
940 for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
941 c = buffer[ii]; /* Last byte */
945 /* Not correct form; fall through */
951 while (i++ < 2 && isxdigit(*p))
953 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
958 case 0: /* Allows for an empty line */
962 case 'A': /* Option setting */
963 options |= PCRE_ANCHORED;
967 options |= PCRE_NOTBOL;
971 while(isdigit(*p)) n = n * 10 + *p++ - '0';
972 copystrings |= 1 << n;
976 while(isdigit(*p)) n = n * 10 + *p++ - '0';
977 getstrings |= 1 << n;
985 options |= PCRE_NOTEMPTY;
989 while(isdigit(*p)) n = n * 10 + *p++ - '0';
990 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
994 options |= PCRE_NOTEOL;
1002 /* Handle matching via the POSIX interface, which does not
1005 #if !defined NOPOSIX
1006 if (posix || do_posix)
1010 regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
1011 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1012 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1014 rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
1018 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1019 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1024 for (i = 0; i < size_offsets; i++)
1026 if (pmatch[i].rm_so >= 0)
1028 fprintf(outfile, "%2d: ", (int)i);
1029 pchars(dbuffer + pmatch[i].rm_so,
1030 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1031 fprintf(outfile, "\n");
1032 if (i == 0 && do_showrest)
1034 fprintf(outfile, " 0+ ");
1035 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1036 fprintf(outfile, "\n");
1043 /* Handle matching via the native interface - repeats for /g and /G */
1046 #endif /* !defined NOPOSIX */
1048 for (;; gmatched++) /* Loop for /g or /G */
1054 clock_t start_time = clock();
1055 for (i = 0; i < LOOPREPEAT; i++)
1056 count = pcre_exec(re, extra, (char *)bptr, len,
1057 start_offset, options | g_notempty, offsets, size_offsets);
1058 time_taken = clock() - start_time;
1059 fprintf(outfile, "Execute time %.3f milliseconds\n",
1060 ((double)time_taken * 1000.0)/
1061 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1064 count = pcre_exec(re, extra, (char *)bptr, len,
1065 start_offset, options | g_notempty, offsets, size_offsets);
1069 fprintf(outfile, "Matched, but too many substrings\n");
1070 count = size_offsets/3;
1078 for (i = 0; i < count * 2; i += 2)
1081 fprintf(outfile, "%2d: <unset>\n", i/2);
1084 fprintf(outfile, "%2d: ", i/2);
1085 pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1086 fprintf(outfile, "\n");
1091 fprintf(outfile, " 0+ ");
1092 pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1093 fprintf(outfile, "\n");
1099 for (i = 0; i < 32; i++)
1101 if ((copystrings & (1 << i)) != 0)
1103 char copybuffer[16];
1104 int rc = pcre_copy_substring((char *)bptr, offsets, count,
1105 i, copybuffer, sizeof(copybuffer));
1107 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1109 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1113 for (i = 0; i < 32; i++)
1115 if ((getstrings & (1 << i)) != 0)
1117 const char *substring;
1118 int rc = pcre_get_substring((char *)bptr, offsets, count,
1121 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1124 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1125 /* free((void *)substring); */
1126 pcre_free_substring(substring);
1133 const char **stringlist;
1134 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1137 fprintf(outfile, "get substring list failed %d\n", rc);
1140 for (i = 0; i < count; i++)
1141 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1142 if (stringlist[i] != NULL)
1143 fprintf(outfile, "string list not terminated by NULL\n");
1144 /* free((void *)stringlist); */
1145 pcre_free_substring_list(stringlist);
1150 /* Failed to match. If this is a /g or /G loop and we previously set
1151 g_notempty after a null match, this is not necessarily the end.
1152 We want to advance the start offset, and continue. Fudge the offset
1153 values to achieve this. We won't be at the end of the string - that
1154 was checked before setting g_notempty. */
1158 if (g_notempty != 0)
1160 offsets[0] = start_offset;
1161 offsets[1] = start_offset + 1;
1165 if (gmatched == 0) /* Error if no previous matches */
1167 if (count == -1) fprintf(outfile, "No match\n");
1168 else fprintf(outfile, "Error %d\n", count);
1170 break; /* Out of the /g loop */
1174 /* If not /g or /G we are done */
1176 if (!do_g && !do_G) break;
1178 /* If we have matched an empty string, first check to see if we are at
1179 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1180 what Perl's /g options does. This turns out to be rather cunning. First
1181 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1182 same point. If this fails (picked up above) we advance to the next
1186 if (offsets[0] == offsets[1])
1188 if (offsets[0] == len) break;
1189 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1192 /* For /g, update the start offset, leaving the rest alone */
1194 if (do_g) start_offset = offsets[1];
1196 /* For /G, update the pointer and length */
1203 } /* End of loop for /g and /G */
1204 } /* End of loop for data lines */
1208 #if !defined NOPOSIX
1209 if (posix || do_posix) regfree(&preg);
1212 if (re != NULL) free(re);
1213 if (extra != NULL) free(extra);
1216 free((void *)tables);
1217 setlocale(LC_CTYPE, "C");
1221 fprintf(outfile, "\n");