ongrep

A cleaned up fork of ngrep for OpenBSD
git clone git://git.sgregoratto.me/ongrep
Log | Files | Refs | README | LICENSE

commit cbd61bb3e4476c8e254917f1f42620adb3d693dd
parent e4d274a83baef10226c4fe93384fb3c4bbebcaa6
Author: Jordan Ritter <jpr5@darkridge.com>
Date:   Tue, 22 Feb 2005 04:48:31 +0000

zero out changelog (regex is now maintained through glibc so there
won't be anymore updates to this particular one), and nuke test and doc
data as they're not pertinent to the ngrep source tree (I'll put in a
link instead)

Diffstat:
Mregex-0.12/ChangeLog | 3036-------------------------------------------------------------------------------
Dregex-0.12/doc/regex.aux | 136-------------------------------------------------------------------------------
Dregex-0.12/doc/regex.cps | 152-------------------------------------------------------------------------------
Dregex-0.12/doc/regex.info | 2836-------------------------------------------------------------------------------
Dregex-0.12/doc/regex.texi | 3138-------------------------------------------------------------------------------
Dregex-0.12/test/ChangeLog | 77-----------------------------------------------------------------------------
Dregex-0.12/test/TAGS | 373-------------------------------------------------------------------------------
Dregex-0.12/test/alloca.c | 194-------------------------------------------------------------------------------
Dregex-0.12/test/bsd-interf.c | 38--------------------------------------
Dregex-0.12/test/debugmalloc.c | 273-------------------------------------------------------------------------------
Dregex-0.12/test/emacsmalloc.c | 844-------------------------------------------------------------------------------
Dregex-0.12/test/fileregex.c | 77-----------------------------------------------------------------------------
Dregex-0.12/test/g++malloc.c | 1288-------------------------------------------------------------------------------
Dregex-0.12/test/getpagesize.h | 25-------------------------
Dregex-0.12/test/iregex.c | 164-------------------------------------------------------------------------------
Dregex-0.12/test/main.c | 49-------------------------------------------------
Dregex-0.12/test/malloc-test.c | 47-----------------------------------------------
Dregex-0.12/test/other.c | 503-------------------------------------------------------------------------------
Dregex-0.12/test/printchar.c | 14--------------
Dregex-0.12/test/psx-basic.c | 253-------------------------------------------------------------------------------
Dregex-0.12/test/psx-extend.c | 1244-------------------------------------------------------------------------------
Dregex-0.12/test/psx-generic.c | 336-------------------------------------------------------------------------------
Dregex-0.12/test/psx-group.c | 440-------------------------------------------------------------------------------
Dregex-0.12/test/psx-interf.c | 624-------------------------------------------------------------------------------
Dregex-0.12/test/psx-interv.c | 140-------------------------------------------------------------------------------
Dregex-0.12/test/regexcpp.sed | 8--------
Dregex-0.12/test/syntax.skel | 74--------------------------------------------------------------------------
Dregex-0.12/test/test.c | 782-------------------------------------------------------------------------------
Dregex-0.12/test/test.h | 141-------------------------------------------------------------------------------
Dregex-0.12/test/tregress.c | 464-------------------------------------------------------------------------------
Dregex-0.12/test/upcase.c | 39---------------------------------------
Dregex-0.12/test/xmalloc.c | 21---------------------
32 files changed, 0 insertions(+), 17830 deletions(-)

diff --git a/regex-0.12/ChangeLog b/regex-0.12/ChangeLog @@ -1,3036 +0,0 @@ -Thu Jan 3 08:52:27 PST 2002 Jordan Ritter (jpr5@darkridge.com) - - * Modified configure.in/configure to displace the standard - regex symbols to other names (since ngrep doesn't use them, - and they collide on MacOS X's installation). - -Fri Apr 2 17:31:59 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * Released version 0.12. - - * regex.c (regerror): If errcode is zero, that's not a valid - error code, according to POSIX, but return "Success." - - * regex.c (regerror): Remember to actually fetch the message - from re_error_msg. - - * regex.c (regex_compile): Don't use the trick for ".*\n" on - ".+\n". Since the latter involves laying an extra choice - point, the backward jump isn't adjusted properly. - -Thu Mar 25 21:35:18 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c (regex_compile): In the handle_open and handle_close - sections, clear pending_exact to zero. - -Tue Mar 9 12:03:07 1993 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu) - - * regex.c (re_search_2): In the loop which searches forward - using fastmap, don't forget to cast the character from the - string to an unsigned before using it as an index into the - translate map. - -Thu Jan 14 15:41:46 1993 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu) - - * regex.h: Never define const; let the callers do it. - configure.in: Don't define USING_AUTOCONF. - -Wed Jan 6 20:49:29 1993 Jim Blandy (jimb@geech.gnu.ai.mit.edu) - - * regex.c (regerror): Abort if ERRCODE is out of range. - -Sun Dec 20 16:19:10 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * configure.in: Arrange to #define USING_AUTOCONF. - * regex.h: If USING_AUTOCONF is #defined, don't mess with - `const' at all; autoconf has taken care of it. - -Mon Dec 14 21:40:39 1992 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu) - - * regex.h (RE_SYNTAX_AWK): Fix typo. From Arnold Robbins. - -Sun Dec 13 20:35:39 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c (compile_range): Fetch the range start and end by - casting the pattern pointer to an `unsigned char *' before - fetching through it. - -Sat Dec 12 09:41:01 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c: Undo change of 12/7/92; it's better for Emacs to - #define HAVE_CONFIG_H. - -Fri Dec 11 22:00:34 1992 Jim Meyering (meyering@hal.gnu.ai.mit.edu) - - * regex.c: Define and use isascii-protected ctype.h macros. - -Fri Dec 11 05:10:38 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c (re_match_2): Undo Karl's November 10th change; it - keeps the group in :\(.*\) from matching :/ properly. - -Mon Dec 7 19:44:56 1992 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu) - - * regex.c: #include config.h if either HAVE_CONFIG_H or emacs - is #defined. - -Tue Dec 1 13:33:17 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) - - * regex.c [HAVE_CONFIG_H]: Include config.h. - -Wed Nov 25 23:46:02 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) - - * regex.c (regcomp): Add parens around bitwise & for clarity. - Initialize preg->allocated to prevent segv. - -Tue Nov 24 09:22:29 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) - - * regex.c: Use HAVE_STRING_H, not USG. - * configure.in: Check for string.h, not USG. - -Fri Nov 20 06:33:24 1992 Karl Berry (karl@cs.umb.edu) - - * regex.c (SIGN_EXTEND_CHAR) [VMS]: Back out of this change, - since Roland Roberts now says it was a localism. - -Mon Nov 16 07:01:36 1992 Karl Berry (karl@cs.umb.edu) - - * regex.h (const) [!HAVE_CONST]: Test another cpp symbol (from - Autoconf) before zapping const. - -Sun Nov 15 05:36:42 1992 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu) - - * regex.c, regex.h: Changes for VMS from Roland B Roberts - <roberts@nsrl31.nsrl.rochester.edu>. - -Thu Nov 12 11:31:15 1992 Karl Berry (karl@cs.umb.edu) - - * Makefile.in (distfiles): Include INSTALL. - -Tue Nov 10 09:29:23 1992 Karl Berry (karl@cs.umb.edu) - - * regex.c (re_match_2): At maybe_pop_jump, if at end of string - and pattern, just quit the matching loop. - - * regex.c (LETTER_P): Rename to `WORDCHAR_P'. - - * regex.c (AT_STRINGS_{BEG,END}): Take `d' as an arg; change - callers. - - * regex.c (re_match_2) [!emacs]: In wordchar and notwordchar - cases, advance d. - -Wed Nov 4 15:43:58 1992 Karl Berry (karl@hal.gnu.ai.mit.edu) - - * regex.h (const) [!__STDC__]: Don't define if it's already defined. - -Sat Oct 17 19:28:19 1992 Karl Berry (karl@cs.umb.edu) - - * regex.c (bcmp, bcopy, bzero): Only #define if they are not - already #defined. - - * configure.in: Use AC_CONST. - -Thu Oct 15 08:39:06 1992 Karl Berry (karl@cs.umb.edu) - - * regex.h (const) [!const]: Conditionalize. - -Fri Oct 2 13:31:42 1992 Karl Berry (karl@cs.umb.edu) - - * regex.h (RE_SYNTAX_ED): New definition. - -Sun Sep 20 12:53:39 1992 Karl Berry (karl@cs.umb.edu) - - * regex.[ch]: remove traces of `longest_p' -- dumb idea to put - this into the pattern buffer, as it means parallelism loses. - - * Makefile.in (config.status): use sh to run configure --no-create. - - * Makefile.in (realclean): OK, don't remove configure. - -Sat Sep 19 09:05:08 1992 Karl Berry (karl@hayley) - - * regex.c (PUSH_FAILURE_POINT, POP_FAILURE_POINT) [DEBUG]: keep - track of how many failure points we push and pop. - (re_match_2) [DEBUG]: declare variables for that, and print results. - (DEBUG_PRINT4): new macro. - - * regex.h (re_pattern_buffer): new field `longest_p' (to - eliminate backtracking if the user doesn't need it). - * regex.c (re_compile_pattern): initialize it (to 1). - (re_search_2): set it to zero if register information is not needed. - (re_match_2): if it's set, don't backtrack. - - * regex.c (re_search_2): update fastmap only after checking that - the pattern is anchored. - - * regex.c (re_match_2): do more debugging at maybe_pop_jump. - - * regex.c (re_search_2): cast result of TRANSLATE for use in - array subscript. - -Thu Sep 17 19:47:16 1992 Karl Berry (karl@geech.gnu.ai.mit.edu) - - * Version 0.11. - -Wed Sep 16 08:17:10 1992 Karl Berry (karl@hayley) - - * regex.c (INIT_FAIL_STACK): rewrite as statements instead of a - complicated comma expr, to avoid compiler warnings (and also - simplify). - (re_compile_fastmap, re_match_2): change callers. - - * regex.c (POP_FAILURE_POINT): cast pop of regstart and regend - to avoid compiler warnings. - - * regex.h (RE_NEWLINE_ORDINARY): remove this syntax bit, and - remove uses. - * regex.c (at_{beg,end}line_loc_p): go the last mile: remove - the RE_NEWLINE_ORDINARY case which made the ^ in \n^ be an anchor. - -Tue Sep 15 09:55:29 1992 Karl Berry (karl@hayley) - - * regex.c (at_begline_loc_p): new fn. - (at_endline_loc_p): simplify at_endline_op_p. - (regex_compile): in ^/$ cases, call the above. - - * regex.c (POP_FAILURE_POINT): rewrite the fn as a macro again, - as lord's profiling indicates the function is 20% of the time. - (re_match_2): callers changed. - - * configure.in (AC_MEMORY_H): remove, since we never use memcpy et al. - -Mon Sep 14 17:49:27 1992 Karl Berry (karl@hayley) - - * Makefile.in (makeargs): include MFLAGS. - -Sun Sep 13 07:41:45 1992 Karl Berry (karl@hayley) - - * regex.c (regex_compile): in \1..\9 case, make it always - invalid to use \<digit> if there is no preceding <digit>th subexpr. - * regex.h (RE_NO_MISSING_BK_REF): remove this syntax bit. - - * regex.c (regex_compile): remove support for invalid empty groups. - * regex.h (RE_NO_EMPTY_GROUPS): remove this syntax bit. - - * regex.c (FREE_VARIABLES) [!REGEX_MALLOC]: define as alloca (0), - to reclaim memory. - - * regex.h (RE_SYNTAX_POSIX_SED): don't bother with this. - -Sat Sep 12 13:37:21 1992 Karl Berry (karl@hayley) - - * README: incorporate emacs.diff. - - * regex.h (_RE_ARGS) [!__STDC__]: define as empty parens. - - * configure.in: add AC_ALLOCA. - - * Put test files in subdir test, documentation in subdir doc. - Adjust Makefile.in and configure.in accordingly. - -Thu Sep 10 10:29:11 1992 Karl Berry (karl@hayley) - - * regex.h (RE_SYNTAX_{POSIX_,}SED): new definitions. - -Wed Sep 9 06:27:09 1992 Karl Berry (karl@hayley) - - * Version 0.10. - -Tue Sep 8 07:32:30 1992 Karl Berry (karl@hayley) - - * xregex.texinfo: put the day of month into the date. - - * Makefile.in (realclean): remove Texinfo-generated files. - (distclean): remove empty sorted index files. - (clean): remove dvi files, etc. - - * configure.in: test for more Unix variants. - - * fileregex.c: new file. - Makefile.in (fileregex): new target. - - * iregex.c (main): move variable decls to smallest scope. - - * regex.c (FREE_VARIABLES): free reg_{,info_}dummy. - (re_match_2): check that the allocation for those two succeeded. - - * regex.c (FREE_VAR): replace FREE_NONNULL with this. - (FREE_VARIABLES): call it. - (re_match_2) [REGEX_MALLOC]: initialize all our vars to NULL. - - * tregress.c (do_match): generalize simple_match. - (SIMPLE_NONMATCH): new macro. - (SIMPLE_MATCH): change from routine. - - * Makefile.in (regex.texinfo): make file readonly, so we don't - edit it by mistake. - - * many files (re_default_syntax): rename to `re_syntax_options'; - call re_set_syntax instead of assigning to the variable where - possible. - -Mon Sep 7 10:12:16 1992 Karl Berry (karl@hayley) - - * syntax.skel: don't use prototypes. - - * {configure,Makefile}.in: new files. - - * regex.c: include <string.h> `#if USG || STDC_HEADERS'; remove - obsolete test for `POSIX', and test for BSRTING. - Include <strings.h> if we are not USG or STDC_HEADERS. - Do not include <unistd.h>. What did we ever need that for? - - * regex.h (RE_NO_EMPTY_ALTS): remove this. - (RE_SYNTAX_AWK): remove from here, too. - * regex.c (regex_compile): remove the check. - * xregex.texinfo (Alternation Operator): update. - * other.c (test_others): remove tests for this. - - * regex.h (RE_DUP_MAX): undefine if already defined. - - * regex.h: (RE_SYNTAX_POSIX*): redo to allow more operators, and - define new syntaxes with the minimal set. - - * syntax.skel (main): used sscanf instead of scanf. - - * regex.h (RE_SYNTAX_*GREP): new definitions from mike. - - * regex.c (regex_compile): initialize the upper bound of - intervals at the beginning of the interval, not the end. - (From pclink@qld.tne.oz.au.) - - * regex.c (handle_bar): rename to `handle_alt', for consistency. - - * regex.c ({store,insert}_{op1,op2}): new routines (except the last). - ({STORE,INSERT}_JUMP{,2}): macros to replace the old routines, - which took arguments in different orders, and were generally weird. - - * regex.c (PAT_PUSH*): rename to `BUF_PUSH*' -- we're not - appending info to the pattern! - -Sun Sep 6 11:26:49 1992 Karl Berry (karl@hayley) - - * regex.c (regex_compile): delete the variable - `following_left_brace', since we never use it. - - * regex.c (print_compiled_pattern): don't print the fastmap if - it's null. - - * regex.c (re_compile_fastmap): handle - `on_failure_keep_string_jump' like `on_failure_jump'. - - * regex.c (re_match_2): in `charset{,_not' case, cast the bit - count to unsigned, not unsigned char, in case we have a full - 32-byte bit list. - - * tregress.c (simple_match): remove. - (simple_test): rename as `simple_match'. - (simple_compile): print the error string if the compile failed. - - * regex.c (DO_RANGE): rewrite as a function, `compile_range', so - we can debug it. Change pattern characters to unsigned char - *'s, and change the range variable to an unsigned. - (regex_compile): change calls. - -Sat Sep 5 17:40:49 1992 Karl Berry (karl@hayley) - - * regex.h (_RE_ARGS): new macro to put in argument lists (if - ANSI) or omit them (if K&R); don't declare routines twice. - - * many files (obscure_syntax): rename to `re_default_syntax'. - -Fri Sep 4 09:06:53 1992 Karl Berry (karl@hayley) - - * GNUmakefile (extraclean): new target. - (realclean): delete the info files. - -Wed Sep 2 08:14:42 1992 Karl Berry (karl@hayley) - - * regex.h: doc fix. - -Sun Aug 23 06:53:15 1992 Karl Berry (karl@hayley) - - * regex.[ch] (re_comp): no const in the return type (from djm). - -Fri Aug 14 07:25:46 1992 Karl Berry (karl@hayley) - - * regex.c (DO_RANGE): declare variables as unsigned chars, not - signed chars (from jimb). - -Wed Jul 29 18:33:53 1992 Karl Berry (karl@claude.cs.umb.edu) - - * Version 0.9. - - * GNUmakefile (distclean): do not remove regex.texinfo. - (realclean): remove it here. - - * tregress.c (simple_test): initialize buf.buffer. - -Sun Jul 26 08:59:38 1992 Karl Berry (karl@hayley) - - * regex.c (push_dummy_failure): new opcode and corresponding - case in the various routines. Pushed at the end of - alternatives. - - * regex.c (jump_past_next_alt): rename to `jump_past_alt', for - brevity. - (no_pop_jump): rename to `jump'. - - * regex.c (regex_compile) [DEBUG]: terminate printing of pattern - with a newline. - - * NEWS: new file. - - * tregress.c (simple_{compile,match,test}): routines to simplify all - these little tests. - - * tregress.c: test for matching as much as possible. - -Fri Jul 10 06:53:32 1992 Karl Berry (karl@hayley) - - * Version 0.8. - -Wed Jul 8 06:39:31 1992 Karl Berry (karl@hayley) - - * regex.c (SIGN_EXTEND_CHAR): #undef any previous definition, as - ours should always work properly. - -Mon Jul 6 07:10:50 1992 Karl Berry (karl@hayley) - - * iregex.c (main) [DEBUG]: conditionalize the call to - print_compiled_pattern. - - * iregex.c (main): initialize buf.buffer to NULL. - * tregress (test_regress): likewise. - - * regex.c (alloca) [sparc]: #if on HAVE_ALLOCA_H instead. - - * tregress.c (test_regress): didn't have jla's test quite right. - -Sat Jul 4 09:02:12 1992 Karl Berry (karl@hayley) - - * regex.c (re_match_2): only REGEX_ALLOCATE all the register - vectors if the pattern actually has registers. - (match_end): new variable to avoid having to use best_regend[0]. - - * regex.c (IS_IN_FIRST_STRING): rename to FIRST_STRING_P. - - * regex.c: doc fixes. - - * tregess.c (test_regress): new fastmap test forwarded by rms. - - * tregress.c (test_regress): initialize the fastmap field. - - * tregress.c (test_regress): new test from jla that aborted - in re_search_2. - -Fri Jul 3 09:10:05 1992 Karl Berry (karl@hayley) - - * tregress.c (test_regress): add tests for translating charsets, - from kaoru. - - * GNUmakefile (common): add alloca.o. - * alloca.c: new file, copied from bison. - - * other.c (test_others): remove var `buf', since it's no longer used. - - * Below changes from ro@TechFak.Uni-Bielefeld.DE. - - * tregress.c (test_regress): initialize buf.allocated. - - * regex.c (re_compile_fastmap): initialize `succeed_n_p'. - - * GNUmakefile (regex): depend on $(common). - -Wed Jul 1 07:12:46 1992 Karl Berry (karl@hayley) - - * Version 0.7. - - * regex.c: doc fixes. - -Mon Jun 29 08:09:47 1992 Karl Berry (karl@fosse) - - * regex.c (pop_failure_point): change string vars to - `const char *' from `unsigned char *'. - - * regex.c: consolidate debugging stuff. - (print_partial_compiled_pattern): avoid enum clash. - -Mon Jun 29 07:50:27 1992 Karl Berry (karl@hayley) - - * xmalloc.c: new file. - * GNUmakefile (common): add it. - - * iregex.c (print_regs): new routine (from jimb). - (main): call it. - -Sat Jun 27 10:50:59 1992 Jim Blandy (jimb@pogo.cs.oberlin.edu) - - * xregex.c (re_match_2): When we have accepted a match and - restored d from best_regend[0], we need to set dend - appropriately as well. - -Sun Jun 28 08:48:41 1992 Karl Berry (karl@hayley) - - * tregress.c: rename from regress.c. - - * regex.c (print_compiled_pattern): improve charset case to ease - byte-counting. - Also, don't distinguish between Emacs and non-Emacs - {not,}wordchar opcodes. - - * regex.c (print_fastmap): move here. - * test.c: from here. - * regex.c (print_{{partial,}compiled_pattern,double_string}): - rename from ..._printer. Change calls here and in test.c. - - * regex.c: create from xregex.c and regexinc.c for once and for - all, and change the debug fns to be extern, instead of static. - * GNUmakefile: remove traces of xregex.c. - * test.c: put in externs, instead of including regexinc.c. - - * xregex.c: move interactive main program and scanstring to iregex.c. - * iregex.c: new file. - * upcase.c, printchar.c: new files. - - * various doc fixes and other cosmetic changes throughout. - - * regexinc.c (compiled_pattern_printer): change variable name, - for consistency. - (partial_compiled_pattern_printer): print other info about the - compiled pattern, besides just the opcodes. - * xregex.c (regex_compile) [DEBUG]: print the compiled pattern - when we're done. - - * xregex.c (re_compile_fastmap): in the duplicate case, set - `can_be_null' and return. - Also, set `bufp->can_be_null' according to a new variable, - `path_can_be_null'. - Also, rewrite main while loop to not test `p != NULL', since - we never set it that way. - Also, eliminate special `can_be_null' value for the endline case. - (re_search_2): don't test for the special value. - * regex.h (struct re_pattern_buffer): remove the definition. - -Sat Jun 27 15:00:40 1992 Karl Berry (karl@hayley) - - * xregex.c (re_compile_fastmap): remove the `RE_' from - `REG_RE_MATCH_NULL_AT_END'. - Also, assert the fastmap in the pattern buffer is non-null. - Also, reset `succeed_n_p' after we've - paid attention to it, instead of every time through the loop. - Also, in the `anychar' case, only clear fastmap['\n'] if the - syntax says to, and don't return prematurely. - Also, rearrange cases in some semblance of a rational order. - * regex.h (REG_RE_MATCH_NULL_AT_END): remove the `RE_' from the name. - - * other.c: take bug reports from here. - * regress.c: new file for them. - * GNUmakefile (test): add it. - * main.c (main): new possible test. - * test.h (test_type): new value in enum. - -Thu Jun 25 17:37:43 1992 Karl Berry (karl@hayley) - - * xregex.c (scanstring) [test]: new function from jimb to allow some - escapes. - (main) [test]: call it (on the string, not the pattern). - - * xregex.c (main): make return type `int'. - -Wed Jun 24 10:43:03 1992 Karl Berry (karl@hayley) - - * xregex.c (pattern_offset_t): change to `int', for the benefit - of patterns which compile to more than 2^15 bytes. - - * xregex.c (GET_BUFFER_SPACE): remove spurious braces. - - * xregex.texinfo (Using Registers): put in a stub to ``document'' - the new function. - * regex.h (re_set_registers) [!__STDC__]: declare. - * xregex.c (re_set_registers): declare K&R style (also move to a - different place in the file). - -Mon Jun 8 18:03:28 1992 Jim Blandy (jimb@pogo.cs.oberlin.edu) - - * regex.h (RE_NREGS): Doc fix. - - * xregex.c (re_set_registers): New function. - * regex.h (re_set_registers): Declaration for new function. - -Fri Jun 5 06:55:18 1992 Karl Berry (karl@hayley) - - * main.c (main): `return 0' instead of `exit (0)'. (From Paul Eggert) - - * regexinc.c (SIGN_EXTEND_CHAR): cast to unsigned char. - (extract_number, EXTRACT_NUMBER): don't bother to cast here. - -Tue Jun 2 07:37:53 1992 Karl Berry (karl@hayley) - - * Version 0.6. - - * Change copyrights to `1985, 89, ...'. - - * regex.h (REG_RE_MATCH_NULL_AT_END): new macro. - * xregex.c (re_compile_fastmap): initialize `can_be_null' to - `p==pend', instead of in the test at the top of the loop (as - it was, it was always being set). - Also, set `can_be_null'=1 if we would jump to the end of the - pattern in the `on_failure_jump' cases. - (re_search_2): check if `can_be_null' is 1, not nonzero. This - was the original test in rms' regex; why did we change this? - - * xregex.c (re_compile_fastmap): rename `is_a_succeed_n' to - `succeed_n_p'. - -Sat May 30 08:09:08 1992 Karl Berry (karl@hayley) - - * xregex.c (re_compile_pattern): declare `regnum' as `unsigned', - not `regnum_t', for the benefit of those patterns with more - than 255 groups. - - * xregex.c: rename `failure_stack' to `fail_stack', for brevity; - likewise for `match_nothing' to `match_null'. - - * regexinc.c (REGEX_REALLOCATE): take both the new and old - sizes, and copy only the old bytes. - * xregex.c (DOUBLE_FAILURE_STACK): pass both old and new. - * This change from Thorsten Ohl. - -Fri May 29 11:45:22 1992 Karl Berry (karl@hayley) - - * regexinc.c (SIGN_EXTEND_CHAR): define as `(signed char) c' - instead of relying on __CHAR_UNSIGNED__, to work with - compilers other than GCC. From Per Bothner. - - * main.c (main): change return type to `int'. - -Mon May 18 06:37:08 1992 Karl Berry (karl@hayley) - - * regex.h (RE_SYNTAX_AWK): typo in RE_RE_UNMATCHED... - -Fri May 15 10:44:46 1992 Karl Berry (karl@hayley) - - * Version 0.5. - -Sun May 3 13:54:00 1992 Karl Berry (karl@hayley) - - * regex.h (struct re_pattern_buffer): now it's just `regs_allocated'. - (REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED): new constants. - * xregex.c (regexec, re_compile_pattern): set the field appropriately. - (re_match_2): and use it. bufp can't be const any more. - -Fri May 1 15:43:09 1992 Karl Berry (karl@hayley) - - * regexinc.c: unconditionally include <sys/types.h>, first. - - * regex.h (struct re_pattern_buffer): rename - `caller_allocated_regs' to `regs_allocated_p'. - * xregex.c (re_compile_pattern): same change here. - (regexec): and here. - (re_match_2): reallocate registers if necessary. - -Fri Apr 10 07:46:50 1992 Karl Berry (karl@hayley) - - * regex.h (RE_SYNTAX{_POSIX,}_AWK): new definitions from Arnold. - -Sun Mar 15 07:34:30 1992 Karl Berry (karl at hayley) - - * GNUmakefile (dist): versionize regex.{c,h,texinfo}. - -Tue Mar 10 07:05:38 1992 Karl Berry (karl at hayley) - - * Version 0.4. - - * xregex.c (PUSH_FAILURE_POINT): always increment the failure id. - (DEBUG_STATEMENT) [DEBUG]: execute the statement even if `debug'==0. - - * xregex.c (pop_failure_point): if the saved string location is - null, keep the current value. - (re_match_2): at fail, test for a dummy failure point by - checking the restored pattern value, not string value. - (re_match_2): new case, `on_failure_keep_string_jump'. - (regex_compile): output this opcode in the .*\n case. - * regexinc.c (re_opcode_t): define the opcode. - (partial_compiled_pattern_pattern): add the new case. - -Mon Mar 9 09:09:27 1992 Karl Berry (karl at hayley) - - * xregex.c (regex_compile): optimize .*\n to output an - unconditional jump to the ., instead of pushing failure points - each time through the loop. - - * xregex.c (DOUBLE_FAILURE_STACK): compute the maximum size - ourselves (and correctly); change callers. - -Sun Mar 8 17:07:46 1992 Karl Berry (karl at hayley) - - * xregex.c (failure_stack_elt_t): change to `const char *', to - avoid warnings. - - * regex.h (re_set_syntax): declare this. - - * xregex.c (pop_failure_point) [DEBUG]: conditionally pass the - original strings and sizes; change callers. - -Thu Mar 5 16:35:35 1992 Karl Berry (karl at claude.cs.umb.edu) - - * xregex.c (regnum_t): new type for register/group numbers. - (compile_stack_elt_t, regex_compile): use it. - - * xregex.c (regexec): declare len as `int' to match re_search. - - * xregex.c (re_match_2): don't declare p1 twice. - - * xregex.c: change `while (1)' to `for (;;)' to avoid silly - compiler warnings. - - * regex.h [__STDC__]: use #if, not #ifdef. - - * regexinc.c (REGEX_REALLOCATE): cast the result of alloca to - (char *), to avoid warnings. - - * xregex.c (regerror): declare variable as const. - - * xregex.c (re_compile_pattern, re_comp): define as returning a const - char *. - * regex.h (re_compile_pattern, re_comp): likewise. - -Thu Mar 5 15:57:56 1992 Karl Berry (karl@hal) - - * xregex.c (regcomp): declare `syntax' as unsigned. - - * xregex.c (re_match_2): try to avoid compiler warnings about - unsigned comparisons. - - * GNUmakefile (test-xlc): new target. - - * regex.h (reg_errcode_t): remove trailing comma from definition. - * regexinc.c (re_opcode_t): likewise. - -Thu Mar 5 06:56:07 1992 Karl Berry (karl at hayley) - - * GNUmakefile (dist): add version numbers automatically. - (versionfiles): new variable. - (regex.{c,texinfo}): don't add version numbers here. - * regex.h: put in placeholder instead of the version number. - -Fri Feb 28 07:11:33 1992 Karl Berry (karl at hayley) - - * xregex.c (re_error_msg): declare const, since it is. - -Sun Feb 23 05:41:57 1992 Karl Berry (karl at fosse) - - * xregex.c (PAT_PUSH{,_2,_3}, ...): cast args to avoid warnings. - (regex_compile, regexec): return REG_NOERROR, instead - of 0, on success. - (boolean): define as char, and #define false and true. - * regexinc.c (STREQ): cast the result. - -Sun Feb 23 07:45:38 1992 Karl Berry (karl at hayley) - - * GNUmakefile (test-cc, test-hc, test-pcc): new targets. - - * regex.inc (extract_number, extract_number_and_incr) [DEBUG]: - only define if we are debugging. - - * xregex.c [_AIX]: do #pragma alloca first if necessary. - * regexinc.c [_AIX]: remove the #pragma from here. - - * regex.h (reg_syntax_t): declare as unsigned, and redo the enum - as #define's again. Some compilers do stupid things with enums. - -Thu Feb 20 07:19:47 1992 Karl Berry (karl at hayley) - - * Version 0.3. - - * xregex.c, regex.h (newline_anchor_match_p): rename to - `newline_anchor'; dumb idea to change the name. - -Tue Feb 18 07:09:02 1992 Karl Berry (karl at hayley) - - * regexinc.c: go back to original, i.e., don't include - <string.h> or define strchr. - * xregex.c (regexec): don't bother with adding characters after - newlines to the fastmap; instead, just don't use a fastmap. - * xregex.c (regcomp): set the buffer and fastmap fields to zero. - - * xregex.texinfo (GNU r.e. compiling): have to initialize more - than two fields. - - * regex.h (struct re_pattern_buffer): rename `newline_anchor' to - `newline_anchor_match_p', as we're back to two cases. - * xregex.c (regcomp, re_compile_pattern, re_comp): change - accordingly. - (re_match_2): at begline and endline, POSIX is not a special - case anymore; just check newline_anchor_match_p. - -Thu Feb 13 16:29:33 1992 Karl Berry (karl at hayley) - - * xregex.c (*empty_string*): rename to *null_string*, for brevity. - -Wed Feb 12 06:36:22 1992 Karl Berry (karl at hayley) - - * xregex.c (re_compile_fastmap): at endline, don't set fastmap['\n']. - (re_match_2): rewrite the begline/endline cases to take account - of the new field newline_anchor. - -Tue Feb 11 14:34:55 1992 Karl Berry (karl at hayley) - - * regexinc.c [!USG etc.]: include <strings.h> and define strchr - as index. - - * xregex.c (re_search_2): when searching backwards, declare `c' - as a char and use casts when using it as an array subscript. - - * xregex.c (regcomp): if REG_NEWLINE, set - RE_HAT_LISTS_NOT_NEWLINE. Set the `newline_anchor' field - appropriately. - (regex_compile): compile [^...] as matching a \n according to - the syntax bit. - (regexec): if doing REG_NEWLINE stuff, compile a fastmap and add - characters after any \n's to the newline. - * regex.h (RE_HAT_LISTS_NOT_NEWLINE): new syntax bit. - (struct re_pattern_buffer): rename `posix_newline' to - `newline_anchor', define constants for its values. - -Mon Feb 10 07:22:50 1992 Karl Berry (karl at hayley) - - * xregex.c (re_compile_fastmap): combine the code at the top and - bottom of the loop, as it's essentially identical. - -Sun Feb 9 10:02:19 1992 Karl Berry (karl at hayley) - - * xregex.texinfo (POSIX Translate Tables): remove this, as it - doesn't match the spec. - - * xregex.c (re_compile_fastmap): if we finish off a path, go - back to the top (to set can_be_null) instead of returning - immediately. - - * xregex.texinfo: changes from bob. - -Sat Feb 1 07:03:25 1992 Karl Berry (karl at hayley) - - * xregex.c (re_search_2): doc fix (from rms). - -Fri Jan 31 09:52:04 1992 Karl Berry (karl at hayley) - - * xregex.texinfo (GNU Searching): clarify the range arg. - - * xregex.c (re_match_2, at_endline_op_p): add extra parens to - get rid of GCC 2's (silly, IMHO) warning about && within ||. - - * xregex.c (common_op_match_empty_string_p): use - MATCH_NOTHING_UNSET_VALUE, not -1. - -Thu Jan 16 08:43:02 1992 Karl Berry (karl at hayley) - - * xregex.c (SET_REGS_MATCHED): only set the registers from - lowest to highest. - - * regexinc.c (MIN): new macro. - * xregex.c (re_match_2): only check min (num_regs, - regs->num_regs) when we set the returned regs. - - * xregex.c (re_match_2): set registers after the first - num_regs to -1 before we return. - -Tue Jan 14 16:01:42 1992 Karl Berry (karl at hayley) - - * xregex.c (re_match_2): initialize max (RE_NREGS, re_nsub + 1) - registers (from rms). - - * xregex.c, regex.h: don't abbreviate `19xx' to `xx'. - - * regexinc.c [!emacs]: include <sys/types.h> before <unistd.h>. - (from ro@thp.Uni-Koeln.DE). - -Thu Jan 9 07:23:00 1992 Karl Berry (karl at hayley) - - * xregex.c (*unmatchable): rename to `match_empty_string_p'. - (CAN_MATCH_NOTHING): rename to `REG_MATCH_EMPTY_STRING_P'. - - * regexinc.c (malloc, realloc): remove prototypes, as they can - cause clashes (from rms). - -Mon Jan 6 12:43:24 1992 Karl Berry (karl at claude.cs.umb.edu) - - * Version 0.2. - -Sun Jan 5 10:50:38 1992 Karl Berry (karl at hayley) - - * xregex.texinfo: bring more or less up-to-date. - * GNUmakefile (regex.texinfo): generate from regex.h and - xregex.texinfo. - * include.awk: new file. - - * xregex.c: change all calls to the fn extract_number_and_incr - to the macro. - - * xregex.c (re_match_2) [emacs]: in at_dot, use PTR_CHAR_POS + 1, - instead of bf_* and sl_*. Cast d to unsigned char *, to match - the declaration in Emacs' buffer.h. - [emacs19]: in before_dot, at_dot, and after_dot, likewise. - - * regexinc.c: unconditionally include <sys/types.h>. - - * regexinc.c (alloca) [!alloca]: Emacs config files sometimes - define this, so don't define it if it's already defined. - -Sun Jan 5 06:06:53 1992 Karl Berry (karl at fosse) - - * xregex.c (re_comp): fix type conflicts with regex_compile (we - haven't been compiling this). - - * regexinc.c (SIGN_EXTEND_CHAR): use `__CHAR_UNSIGNED__', not - `CHAR_UNSIGNED'. - - * regexinc.c (NULL) [!NULL]: define it (as zero). - - * regexinc.c (extract_number): remove the temporaries. - -Sun Jan 5 07:50:14 1992 Karl Berry (karl at hayley) - - * regex.h (regerror) [!__STDC__]: return a size_t, not a size_t *. - - * xregex.c (PUSH_FAILURE_POINT, ...): declare `destination' as - `char *' instead of `void *', to match alloca declaration. - - * xregex.c (regerror): use `size_t' for the intermediate values - as well as the return type. - - * xregex.c (regexec): cast the result of malloc. - - * xregex.c (regexec): don't initialize `private_preg' in the - declaration, as old C compilers can't do that. - - * xregex.c (main) [test]: declare printchar void. - - * xregex.c (assert) [!DEBUG]: define this to do nothing, and - remove #ifdef DEBUG's from around asserts. - - * xregex.c (re_match_2): remove error message when not debugging. - -Sat Jan 4 09:45:29 1992 Karl Berry (karl at hayley) - - * other.c: test the bizarre duplicate case in re_compile_fastmap - that I just noticed. - - * test.c (general_test): don't test registers beyond the end of - correct_regs, as well as regs. - - * xregex.c (regex_compile): at handle_close, don't assign to - *inner_group_loc if we didn't push a start_memory (because the - group number was too big). In fact, don't push or pop the - inner_group_offset in that case. - - * regex.c: rename to xregex.c, since it's not the whole thing. - * regex.texinfo: likewise. - * GNUmakefile: change to match. - - * regex.c [DEBUG]: only include <stdio.h> if debugging. - - * regexinc.c (SIGN_EXTEND_CHAR) [CHAR_UNSIGNED]: if it's already - defined, don't redefine it. - - * regex.c: define _GNU_SOURCE at the beginning. - * regexinc.c (isblank) [!isblank]: define it. - (isgraph) [!isgraph]: change conditional to this, and remove the - sequent stuff. - - * regex.c (regex_compile): add `blank' character class. - - * regex.c (regex_compile): don't use a uchar variable to loop - through all characters. - - * regex.c (regex_compile): at '[', improve logic for checking - that we have enough space for the charset. - - * regex.h (struct re_pattern_buffer): declare translate as char - * again. We only use it as an array subscript once, I think. - - * regex.c (TRANSLATE): new macro to cast the data character - before subscripting. - (num_internal_regs): rename to `num_regs'. - -Fri Jan 3 07:58:01 1992 Karl Berry (karl at hayley) - - * regex.h (struct re_pattern_buffer): declare `allocated' and - `used' as unsigned long, since these are never negative. - - * regex.c (compile_stack_element): rename to compile_stack_elt_t. - (failure_stack_element): similarly. - - * regexinc.c (TALLOC, RETALLOC): new macros to simplify - allocation of arrays. - - * regex.h (re_*) [__STDC__]: don't declare string args unsigned - char *; that makes them incompatible with string constants. - (struct re_pattern_buffer): declare the pattern and translate - table as unsigned char *. - * regex.c (most routines): use unsigned char vs. char consistently. - - * regex.h (re_compile_pattern): do not declare the length arg as - const. - * regex.c (re_compile_pattern): likewise. - - * regex.c (POINTER_TO_REG): rename to `POINTER_TO_OFFSET'. - - * regex.h (re_registers): declare `start' and `end' as - `regoff_t', instead of `int'. - - * regex.c (regexec): if either of the malloc's for the register - information fail, return failure. - - * regex.h (RE_NREGS): define this again, as 30 (from jla). - (RE_ALLOCATE_REGISTERS): remove this. - (RE_SYNTAX_*): remove it from definitions. - (re_pattern_buffer): remove `return_default_num_regs', add - `caller_allocated_regs'. - * regex.c (re_compile_pattern): clear no_sub and - caller_allocated_regs in the pattern. - (regcomp): set caller_allocated_regs. - (re_match_2): do all register allocation at the end of the - match; implement new semantics. - - * regex.c (MAX_REGNUM): new macro. - (regex_compile): at handle_open and handle_close, if the group - number is too large, don't push the start/stop memory. - -Thu Jan 2 07:56:10 1992 Karl Berry (karl at hayley) - - * regex.c (re_match_2): if the back reference is to a group that - never matched, then goto fail, not really_fail. Also, don't - test if the pattern can match the empty string. Why did we - ever do that? - (really_fail): this label no longer needed. - - * regexinc.c [STDC_HEADERS]: use only this to test if we should - include <stdlib.h>. - - * regex.c (DO_RANGE, regex_compile): translate in all cases - except the single character after a \. - - * regex.h (RE_AWK_CLASS_HACK): rename to - RE_BACKSLASH_ESCAPE_IN_LISTS. - * regex.c (regex_compile): change use. - - * regex.c (re_compile_fastmap): do not translate the characters - again; we already translated them at compilation. (From ylo@ngs.fi.) - - * regex.c (re_match_2): in case for at_dot, invert sense of - comparison and find the character number properly. (From - worley@compass.com.) - (re_match_2) [emacs]: remove the cases for before_dot and - after_dot, since there's no way to specify them, and the code - is wrong (judging from this change). - -Wed Jan 1 09:13:38 1992 Karl Berry (karl at hayley) - - * psx-{interf,basic,extend}.c, other.c: set `t' as the first - thing, so that if we run them in sucession, general_test's - kludge to see if we're doing POSIX tests works. - - * test.h (test_type): add `all_test'. - * main.c: add case for `all_test'. - - * regexinc.c (partial_compiled_pattern_printer, - double_string_printer): don't print anything if we're passed null. - - * regex.c (PUSH_FAILURE_POINT): do not scan for the highest and - lowest active registers. - (re_match_2): compute lowest/highest active regs at start_memory and - stop_memory. - (NO_{LOW,HIGH}EST_ACTIVE_REG): new sentinel values. - (pop_failure_point): return the lowest/highest active reg values - popped; change calls. - - * regex.c [DEBUG]: include <assert.h>. - (various routines) [DEBUG]: change conditionals to assertions. - - * regex.c (DEBUG_STATEMENT): new macro. - (PUSH_FAILURE_POINT): use it to increment num_regs_pushed. - (re_match_2) [DEBUG]: only declare num_regs_pushed if DEBUG. - - * regex.c (*can_match_nothing): rename to *unmatchable. - - * regex.c (re_match_2): at stop_memory, adjust argument reading. - - * regex.h (re_pattern_buffer): declare `can_be_null' as a 2-bit - bit field. - - * regex.h (re_pattern_buffer): declare `buffer' unsigned char *; - no, dumb idea. The pattern can have signed number. - - * regex.c (re_match_2): in maybe_pop_jump case, skip over the - right number of args to the group operators, and don't do - anything with endline if posix_newline is not set. - - * regex.c, regexinc.c (all the things we just changed): go back - to putting the inner group count after the start_memory, - because we need it in the on_failure_jump case in re_match_2. - But leave it after the stop_memory also, since we need it - there in re_match_2, and we don't have any way of getting back - to the start_memory. - - * regexinc.c (partial_compiled_pattern_printer): adjust argument - reading for start/stop_memory. - * regex.c (re_compile_fastmap, group_can_match_nothing): likewise. - -Tue Dec 31 10:15:08 1991 Karl Berry (karl at hayley) - - * regex.c (bits list routines): remove these. - (re_match_2): get the number of inner groups from the pattern, - instead of keeping track of it at start and stop_memory. - Put the count after the stop_memory, not after the - start_memory. - (compile_stack_element): remove `fixup_inner_group' member, - since we now put it in when we can compute it. - (regex_compile): at handle_open, don't push the inner group - offset, and at handle_close, don't pop it. - - * regex.c (level routines): remove these, and their uses in - regex_compile. This was another manifestation of having to find - $'s that were endlines. - - * regex.c (regexec): this does searching, not matching (a - well-disguised part of the standard). So rewrite to use - `re_search' instead of `re_match'. - * psx-interf.c (test_regexec): add tests to, uh, match. - - * regex.h (RE_TIGHT_ALT): remove this; nobody uses it. - * regex.c: remove the code that was supposed to implement it. - - * other.c (test_others): ^ and $ never match newline characters; - RE_CONTEXT_INVALID_OPS doesn't affect anchors. - - * psx-interf.c (test_regerror): update for new error messages. - - * psx-extend.c: it's now ok to have an alternative be just a $, - so remove all the tests which supposed that was invalid. - -Wed Dec 25 09:00:05 1991 Karl Berry (karl at hayley) - - * regex.c (regex_compile): in handle_open, don't skip over ^ and - $ when checking for an empty group. POSIX has changed the - grammar. - * psx-extend.c (test_posix_extended): thus, move (^$) tests to - valid section. - - * regexinc.c (boolean): move from here to test.h and regex.c. - * test files: declare verbose, omit_register_tests, and - test_should_match as boolean. - - * psx-interf.c (test_posix_c_interface): remove the `c_'. - * main.c: likewise. - - * psx-basic.c (test_posix_basic): ^ ($) is an anchor after - (before) an open (close) group. - - * regex.c (re_match_2): in endline, correct precedence of - posix_newline condition. - -Tue Dec 24 06:45:11 1991 Karl Berry (karl at hayley) - - * test.h: incorporate private-tst.h. - * test files: include test.h, not private-tst.h. - - * test.c (general_test): set posix_newline to zero if we are - doing POSIX tests (unfortunately, it's difficult to call - regcomp in this case, which is what we should really be doing). - - * regex.h (reg_syntax_t): make this an enumeration type which - defines the syntax bits; renames re_syntax_t. - - * regex.c (at_endline_op_p): don't preincrement p; then if it's - not an empty string op, we lose. - - * regex.h (reg_errcode_t): new enumeration type of the error - codes. - * regex.c (regex_compile): return that type. - - * regex.c (regex_compile): in [, initialize - just_had_a_char_class to false; somehow I had changed this to - true. - - * regex.h (RE_NO_CONSECUTIVE_REPEATS): remove this, since we - don't use it, and POSIX doesn't require this behavior anymore. - * regex.c (regex_compile): remove it from here. - - * regex.c (regex_compile): remove the no_op insertions for - verify_and_adjust_endlines, since that doesn't exist anymore. - - * regex.c (regex_compile) [DEBUG]: use printchar to print the - pattern, so unprintable bytes will print properly. - - * regex.c: move re_error_msg back. - * test.c (general_test): print the compile error if the pattern - was invalid. - -Mon Dec 23 08:54:53 1991 Karl Berry (karl at hayley) - - * regexinc.c: move re_error_msg here. - - * regex.c (re_error_msg): the ``message'' for success must be - NULL, to keep the interface to re_compile_pattern the same. - (regerror): if the msg is null, use "Success". - - * rename most test files for consistency. Change Makefile - correspondingly. - - * test.c (most routines): add casts to (unsigned char *) when we - call re_{match,search}{,_2}. - -Sun Dec 22 09:26:06 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): declare string args as unsigned char * - again; don't declare non-pointer args const; declare the - pattern buffer const. - (re_match): likewise. - (re_search_2, re_search): likewise, except don't declare the - pattern const, since we make a fastmap. - * regex.h [__STDC__]: change prototypes. - - * regex.c (regex_compile): return an error code, not a string. - (re_err_list): new table to map from error codes to string. - (re_compile_pattern): return an element of re_err_list. - (regcomp): don't test all the strings. - (regerror): just use the list. - (put_in_buffer): remove this. - - * regex.c (equivalent_failure_points): remove this. - - * regex.c (re_match_2): don't copy the string arguments into - non-const pointers. We never alter the data. - - * regex.c (re_match_2): move assignment to `is_a_jump_n' out of - the main loop. Just initialize it right before we do - something with it. - - * regex.[ch] (re_match_2): don't declare the int parameters const. - -Sat Dec 21 08:52:20 1991 Karl Berry (karl at hayley) - - * regex.h (re_syntax_t): new type; declare to be unsigned - (previously we used int, but since we do bit operations on - this, unsigned is better, according to H&S). - (obscure_syntax, re_pattern_buffer): use that type. - * regex.c (re_set_syntax, regex_compile): likewise. - - * regex.h (re_pattern_buffer): new field `posix_newline'. - * regex.c (re_comp, re_compile_pattern): set to zero. - (regcomp): set to REG_NEWLINE. - * regex.h (RE_HAT_LISTS_NOT_NEWLINE): remove this (we can just - check `posix_newline' instead.) - - * regex.c (op_list_type, op_list, add_op): remove these. - (verify_and_adjust_endlines): remove this. - (pattern_offset_list_type, *pattern_offset* routines): and these. - These things all implemented the nonleading/nontrailing position - code, which was very long, had a few remaining problems, and - is no longer needed. So... - - * regexinc.c (STREQ): new macro to abbreviate strcmp(,)==0, for - brevity. Change various places in regex.c to use it. - - * regex{,inc}.c (enum regexpcode): change to a typedef - re_opcode_t, for brevity. - - * regex.h (re_syntax_table) [SYNTAX_TABLE]: remove this; it - should only be in regex.c, I think, since we don't define it - in this case. Maybe it should be conditional on !SYNTAX_TABLE? - - * regexinc.c (partial_compiled_pattern_printer): simplify and - distinguish the emacs/not-emacs (not)wordchar cases. - -Fri Dec 20 08:11:38 1991 Karl Berry (karl at hayley) - - * regexinc.c (regexpcode) [emacs]: only define the Emacs opcodes - if we are ifdef emacs. - - * regex.c (BUF_PUSH*): rename to PAT_PUSH*. - - * regex.c (regex_compile): in $ case, go back to essentially the - original code for deciding endline op vs. normal char. - (at_endline_op_p): new routine. - * regex.h (RE_ANCHORS_ONLY_AT_ENDS, RE_CONTEXT_INVALID_ANCHORS, - RE_REPEATED_ANCHORS_AWAY, RE_NO_ANCHOR_AT_NEWLINE): remove - these. POSIX has simplified the rules for anchors in draft - 11.2. - (RE_NEWLINE_ORDINARY): new syntax bit. - (RE_CONTEXT_INDEP_ANCHORS): change description to be compatible - with POSIX. - * regex.texinfo (Syntax Bits): remove the descriptions. - -Mon Dec 16 08:12:40 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): in jump_past_next_alt, unconditionally - goto no_pop. The only register we were finding was one which - enclosed the whole alternative expression, not one around an - individual alternative. So we were never doing what we - thought we were doing, and this way makes (|a) against the - empty string fail. - - * regex.c (regex_compile): remove `highest_ever_regnum', and - don't restore regnum from the stack; just put it into a - temporary to put into the stop_memory. Otherwise, groups - aren't numbered consecutively. - - * regex.c (is_in_compile_stack): rename to - `group_in_compile_stack'; remove unnecessary test for the - stack being empty. - - * regex.c (re_match_2): in on_failure_jump, skip no_op's before - checking for the start_memory, in case we were called from - succeed_n. - -Sun Dec 15 16:20:48 1991 Karl Berry (karl at hayley) - - * regex.c (regex_compile): in duplicate case, use - highest_ever_regnum instead of regnum, since the latter is - reverted at stop_memory. - - * regex.c (re_match_2): in on_failure_jump, if the * applied to - a group, save the information for that group and all inner - groups (by making it active), even though we're not inside it - yet. - -Sat Dec 14 09:50:59 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_ITEM, POP_FAILURE_ITEM): new macros. - Use them instead of copying the stack manipulating a zillion - times. - - * regex.c (PUSH_FAILURE_POINT, pop_failure_point) [DEBUG]: save - and restore a unique identification value for each failure point. - - * regexinc.c (partial_compiled_pattern_printer): don't print an - extra / after duplicate commands. - - * regex.c (regex_compile): in back-reference case, allow a back - reference to register `regnum'. Otherwise, even `\(\)\1' - fails, since regnum is 1 at the back-reference. - - * regex.c (re_match_2): in fail, don't examine the pattern if we - restored to pend. - - * test_private.h: rename to private_tst.h. Change includes. - - * regex.c (extend_bits_list): compute existing size for realloc - in bytes, not blocks. - - * regex.c (re_match_2): in jump_past_next_alt, the for loop was - missing its (empty) statement. Even so, some register tests - still fail, although in a different way than in the previous change. - -Fri Dec 13 15:55:08 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): in jump_past_next_alt, unconditionally - goto no_pop, since we weren't properly detecting if the - alternative matched something anyway. No, we need to not jump - to keep the register values correct; just change to not look at - register zero and not test RE_NO_EMPTY_ALTS (which is a - compile-time thing). - - * regex.c (SET_REGS_MATCHED): start the loop at 1, since we never - care about register zero until the very end. (I think.) - - * regex.c (PUSH_FAILURE_POINT, pop_failure_point): go back to - pushing and popping the active registers, instead of only doing - the registers before a group: (fooq|fo|o)*qbar against fooqbar - fails, since we restore back into the middle of group 1, yet it - isn't active, because the previous restore clobbered the active flag. - -Thu Dec 12 17:25:36 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): do not call - `equivalent_failure_points' after all; it causes the registers - to be ``wrong'' (according to POSIX), and an infinite loop on - `((a*)*)*' against `ab'. - - * regex.c (re_compile_fastmap): don't push `pend' on the failure - stack. - -Tue Dec 10 10:30:03 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): if pushing same failure point that - is on the top of the stack, fail. - (equivalent_failure_points): new routine. - - * regex.c (re_match_2): add debug statements for every opcode we - execute. - - * regex.c (regex_compile/handle_close): restore - `fixup_inner_group_count' and `regnum' from the stack. - -Mon Dec 9 13:51:15 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): declare `this_reg' as int, so - unsigned arithmetic doesn't happen when we don't want to save - the registers. - -Tue Dec 3 08:11:10 1991 Karl Berry (karl at hayley) - - * regex.c (extend_bits_list): divide size by bits/block. - - * regex.c (init_bits_list): remove redundant assignmen to - `bits_list_ptr'. - - * regexinc.c (partial_compiled_pattern_printer): don't do *p++ - twice in the same expr. - - * regex.c (re_match_2): at on_failure_jump, use the correct - pattern positions for getting the stuff following the start_memory. - - * regex.c (struct register_info): remove the bits_list for the - inner groups; make that a separate variable. - -Mon Dec 2 10:42:07 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): don't pass `failure_stack' as an - arg; change callers. - - * regex.c (PUSH_FAILURE_POINT): print items in order they are - pushed. - (pop_failure_point): likewise. - - * regex.c (main): prompt for the pattern and string. - - * regex.c (FREE_VARIABLES) [!REGEX_MALLOC]: declare as nothing; - remove #ifdefs from around calls. - - * regex.c (extract_number, extract_number_and_incr): declare static. - - * regex.c: remove the canned main program. - * main.c: new file. - * Makefile (COMMON): add main.o. - -Tue Sep 24 06:26:51 1991 Kathy Hargreaves (kathy at fosse) - - * regex.c (re_match_2): Made `pend' and `dend' not register variables. - Only set string2 to string1 if string1 isn't null. - Send address of p, d, regstart, regend, and reg_info to - pop_failure_point. - Put in more debug statements. - - * regex.c [debug]: Added global variable. - (DEBUG_*PRINT*): Only print if `debug' is true. - (DEBUG_DOUBLE_STRING_PRINTER): Changed DEBUG_STRING_PRINTER's - name to this. - Changed some comments. - (PUSH_FAILURE_POINT): Moved and added some debugging statements. - Was saving regstart on the stack twice instead of saving both - regstart and regend; remedied this. - [NUM_REGS_ITEMS]: Changed from 3 to 4, as now save lowest and - highest active registers instead of highest used one. - [NUM_NON_REG_ITEMS]: Changed name of NUM_OTHER_ITEMS to this. - (NUM_FAILURE_ITEMS): Use active registers instead of number 0 - through highest used one. - (re_match_2): Have pop_failure_point put things in the variables. - (pop_failure_point): Have it do what the fail case in re_match_2 - did with the failure stack, instead of throwing away the stuff - popped off. re_match_2 can ignore results when it doesn't - need them. - - -Thu Sep 5 13:23:28 1991 Kathy Hargreaves (kathy at fosse) - - * regex.c (banner): Changed copyright years to be separate. - - * regex.c [CHAR_UNSIGNED]: Put __ at both ends of this name. - [DEBUG, debug_count, *debug_p, DEBUG_PRINT_1, DEBUG_PRINT_2, - DEBUG_COMPILED_PATTERN_PRINTER ,DEBUG_STRING_PRINTER]: - defined these for debugging. - (extract_number): Added this (debuggable) routine version of - the macro EXTRACT_NUMBER. Ditto for EXTRACT_NUMBER_AND_INCR. - (re_compile_pattern): Set return_default_num_regs if the - syntax bit RE_ALLOCATE_REGISTERS is set. - [REGEX_MALLOC]: Renamed USE_ALLOCA to this. - (BUF_POP): Got rid of this, as don't ever use it. - (regex_compile): Made the type of `pattern' not be register. - If DEBUG, print the pattern to compile. - (re_match_2): If had a `$' in the pattern before a `^' then - don't record the `^' as an anchor. - Put (enum regexpcode) before references to b, as suggested - [RE_NO_BK_BRACES]: Changed RE_NO_BK_CURLY_BRACES to this. - (remove_pattern_offset): Removed this unused routine. - (PUSH_FAILURE_POINT): Changed to only save active registers. - Put in debugging statements. - (re_compile_fastmap): Made `pattern' not a register variable. - Use routine for extracting numbers instead of macro. - (re_match_2): Made `p', `mcnt' and `mcnt2' not register variables. - Added `num_regs_pushed' for debugging. - Only malloc registers if the syntax bit RE_ALLOCATE_REGISTERS is set. - Put in debug statements. - Put the macro NOTE_INNER_GROUP's code inline, as it was the - only called in one place. - For debugging, extract numbers using routines instead of macros. - In case fail: only restore pushed active registers, and added - debugging statements. - (pop_failure_point): Test for underfull stack. - (group_can_match_nothing, common_op_can_match_nothing): For - debugging, extract numbers using routines instead of macros. - (regexec): Changed formal parameters to not be prototypes. - Don't initialize `regs' or `private_preg' in their declarations. - -Tue Jul 23 18:38:36 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h [RE_CONTEX_INDEP_OPS]: Moved the anchor stuff out of - this bit. - [RE_UNMATCHED_RIGHT_PAREN_ORD]: Defined this bit. - [RE_CONTEXT_INVALID_ANCHORS]: Defined this bit. - [RE_CONTEXT_INDEP_ANCHORS]: Defined this bit. - Added RE_CONTEXT_INDEP_ANCHORS to all syntaxes which had - RE_CONTEXT_INDEP_OPS. - Took RE_ANCHORS_ONLY_AT_ENDS out of the POSIX basic syntax. - Added RE_UNMATCHED_RIGHT_PAREN_ORD to the POSIX extended - syntax. - Took RE_REPEATED_ANCHORS_AWAY out of the POSIX extended syntax. - Defined REG_NOERROR (which will probably have to go away again). - Changed the type `off_t' to `regoff_t'. - - * regex.c: Changed some commments. - (regex_compile): Added variable `had_an_endline' to keep track - of if hit a `$' since the beginning of the pattern or the last - alternative (if any). - Changed RE_CONTEXT_INVALID_OPS and RE_CONTEXT_INDEP_OPS to - RE_CONTEXT_INVALID_ANCHORS and RE_CONTEXT_INDEP_ANCHORS where - appropriate. - Put a `no_op' in the pattern if a repeat is only zero or one - times; in this case and if it is many times (whereupon a jump - backwards is pushed instead), keep track of the operator for - verify_and_adjust_endlines. - If RE_UNMATCHED_RIGHT_PAREN is set, make an unmatched - close-group operator match `)'. - Changed all error exits to exit (1). - (remove_pattern_offset): Added this routine, but don't use it. - (verify_and_adjust_endlines): At top of routine, if initialize - routines run out of memory, return true after setting - enough_memory false. - At end of endline, et al. case, don't set *p to no_op. - Repetition operators also set the level and active groups' - match statuses, unless RE_REPEATED_ANCHORS_AWAY is set. - (get_group_match_status): Put a return in front of call to get_bit. - (re_compile_fastmap): Changed is_a_succeed_n to a boolean. - If at end of pattern, then if the failure stack isn't empty, - go back to the failure point. - In *jump* case, only pop the stack if what's on top of it is - where we've just jumped to. - (re_search_2): Return -2 instead of val if val is -2. - (group_can_match_nothing, alternative_can_match_nothing, - common_op_can-match_nothing): Now pass in reg_info for the - `duplicate' case. - (re_match_2): Don't skip over the next alternative also if - empty alternatives aren't allowed. - In fail case, if failed to a backwards jump that's part of a - repetition loop, pop the current failure point and use the - next one. - (pop_failure_point): Check that there's as many register items - on the failure stack as the stack says there are. - (common_op_can_match_nothing): Added variables `ret' and - `reg_no' so can set reg_info for the group encountered. - Also break without doing anything if hit a no_op or the other - kinds of `endline's. - If not done already, set reg_info in start_memory case. - Put in no_pop_jump for an optimized succeed_n of zero repetitions. - In succeed_n case, if the number isn't zero, then return false. - Added `duplicate' case. - -Sat Jul 13 11:27:38 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (REG_NOERROR): Added this error code definition. - - * regex.c: Took some redundant parens out of macros. - (enum regexpcode): Added jump_past_next_alt. - Wrapped some macros in `do..while (0)'. - Changed some comments. - (regex_compile): Use `fixup_alt_jump' instead of `fixup_jump'. - Use `maybe_pop_jump' instead of `maybe_pop_failure_jump'. - Use `jump_past_next_alt' instead of `no_pop_jump' when at the - end of an alternative. - (re_match_2): Used REGEX_ALLOCATE for the registers stuff. - In stop_memory case: Add more boolean tests to see if the - group is in a loop. - Added jump_past_next_alt case, which doesn't jump over the - next alternative if the last one didn't match anything. - Unfortunately, to make this work with, e.g., `(a+?*|b)*' - against `bb', I also had to pop the alternative's failure - point, which in turn broke backtracking! - In fail case: Detect a dummy failure point by looking at - failure_stack.avail - 2, not stack[-2]. - (pop_failure_point): Only pop if the stack isn't empty; don't - give an error if it is. (Not sure yet this is correct.) - (group_can_match_nothing): Make it return a boolean instead of int. - Make it take an argument indicating the end of where it should look. - If find a group that can match nothing, set the pointer - argument to past the group in the pattern. - Took out cases which can share with alternative_can_match_nothing - and call common_op_can_match_nothing. - Took ++ out of switch, so could call common_op_can_match_nothing. - Wrote lots more for on_failure_jump case to handle alternatives. - Main loop now doesn't look for matching stop_memory, but - rather the argument END; return true if hit the matching - stop_memory; this way can call itself for inner groups. - (alternative_can_match_nothing): Added for alternatives. - (common_op_can_match_nothing): Added for previous two routines' - common operators. - (regerror): Returns a message saying there's no error if gets - sent REG_NOERROR. - -Wed Jul 3 10:43:15 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c: Removed unnecessary enclosing parens from several macros. - Put `do..while (0)' around a few. - Corrected some comments. - (INIT_FAILURE_STACK_SIZE): Deleted in favor of using - INIT_FAILURE_ALLOC. - (INIT_FAILURE_STACK, DOUBLE_FAILURE_STACK, PUSH_PATTERN_OP, - PUSH_FAILURE_POINT): Made routines of the same name (but with all - lowercase letters) into these macros, so could use `alloca' - when USE_ALLOCA is defined. The reason is stated below for - bits lists. Deleted analogous routines. - (re_compile_fastmap): Added variable void *destination for - PUSH_PATTERN_OP. - (re_match_2): Added variable void *destination for REGEX_REALLOCATE. - Used the failure stack macros in place of the routines. - Detected a dummy failure point by inspecting the failure stack's - (avail - 2)th element, not failure_stack.stack[-2]. This bug - arose when used the failure stack macros instead of the routines. - - * regex.c [USE_ALLOCA]: Put this conditional around previous - alloca stuff and defined these to work differently depending - on whether or not USE_ALLOCA is defined: - (REGEX_ALLOCATE): Uses either `alloca' or `malloc'. - (REGEX_REALLOCATE): Uses either `alloca' or `realloc'. - (INIT_BITS_LIST, EXTEND_BITS_LIST, SET_BIT_TO_VALUE): Defined - macro versions of routines with the same name (only with all - lowercase letters) so could use `alloc' in re_match_2. This - is to prevent core leaks when C-g is used in Emacs and to make - things faster and avoid storage fragmentation. These things - have to be macros because the results of `alloca' go away with - the routine by which it's called. - (BITS_BLOCK_SIZE, BITS_BLOCK, BITS_MASK): Moved to above the - above-mentioned macros instead of before the routines defined - below regex_compile. - (set_bit_to_value): Compacted some code. - (reg_info_type): Changed inner_groups field to be bits_list_type - so could be arbitrarily long and thus handle arbitrary nesting. - (NOTE_INNER_GROUP): Put `do...while (0)' around it so could - use as a statement. - Changed code to use bits lists. - Added variable void *destination for REGEX_REALLOCATE (whose call - is several levels in). - Changed variable name of `this_bit' to `this_reg'. - (FREE_VARIABLES): Only define and use if USE_ALLOCA is defined. - (re_match_2): Use REGEX_ALLOCATE instead of malloc. - Instead of setting INNER_GROUPS of reg_info to zero, have to - use INIT_BITS_LIST and return -2 (and free variables if - USE_ALLOCA isn't defined) if it fails. - -Fri Jun 28 13:45:07 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): set value of `dend' when we restore `d'. - - * regex.c: remove declaration of alloca. - - * regex.c (MISSING_ISGRAPH): rename to `ISGRAPH_MISSING'. - - * regex.h [_POSIX_SOURCE]: remove these conditionals; always - define POSIX stuff. - * regex.c (_POSIX_SOURCE): change conditionals to use `POSIX' - instead. - -Sat Jun 1 16:56:50 1991 Kathy Hargreaves (kathy at hayley) - - * regex.*: Changed RE_CONTEXTUAL_* to RE_CONTEXT_*, - RE_TIGHT_VBAR to RE_TIGHT_ALT, RE_NEWLINE_OR to - RE_NEWLINE_ALT, and RE_DOT_MATCHES_NEWLINE to RE_DOT_NEWLINE. - -Wed May 29 09:24:11 1991 Karl Berry (karl at hayley) - - * regex.texinfo (POSIX Pattern Buffers): cross-reference the - correct node name (Match-beginning-of-line, not ..._line). - (Syntax Bits): put @code around all syntax bits. - -Sat May 18 16:29:58 1991 Karl Berry (karl at hayley) - - * regex.c (global): add casts to keep broken compilers from - complaining about malloc and realloc calls. - - * regex.c (isgraph) [MISSING_ISGRAPH]: change test to this, - instead of `#ifndef isgraph', since broken compilers can't - have both a macro and a symbol by the same name. - - * regex.c (re_comp, re_exec) [_POSIX_SOURCE]: do not define. - (regcomp, regfree, regexec, regerror) [_POSIX_SOURCE && !emacs]: - only define in this case. - -Mon May 6 17:37:04 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (re_search, re_search_2): Changed BUFFER to not be const. - - * regex.c (re_compile_pattern): `^' is in a leading position if - it precedes a newline. - (various routines): Added or changed header comments. - (double_pattern_offsets_list): Changed name from - `extend_pattern_offsets_list'. - (adjust_pattern_offsets_list): Changed return value from - unsigned to void. - (verify_and_adjust_endlines): Now returns `true' and `false' - instead of 1 and 0. - `$' is in a leading position if it follows a newline. - (set_bit_to_value, get_bit_value): Exit with error if POSITION < 0 - so now calling routines don't have to. - (init_failure_stack, inspect_failure_stack_top, - pop_failure_stack_top, push_pattern_op, double_failure_stack): - Now return value unsigned instead of boolean. - (re_search, re_search_2): Changed BUFP to not be const. - (re_search_2): Added variable const `private_bufp' to send to - re_match_2. - (push_failure_point): Made return value unsigned instead of boolean. - -Sat May 4 15:32:22 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (re_compile_fastmap): Added extern for this. - Changed some comments. - - * regex.c (re_compile_pattern): In case handle_bar: put invalid - pattern test before levels matching stuff. - Changed some commments. - Added optimizing test for detecting an empty alternative that - ends with a trailing '$' at the end of the pattern. - (re_compile_fastmap): Moved failure_stack stuff to before this - so could use it. Made its stack dynamic. - Made it return an int so that it could return -2 if its stack - couldn't be allocated. - Added to header comment (about the return values). - (init_failure_stack): Wrote so both re_match_2 and - re_compile_fastmap could use it similar stacks. - (double_failure_stack): Added for above reasons. - (push_pattern_op): Wrote for re_compile_fastmap. - (re_search_2): Now return -2 if re_compile_fastmap does. - (re_match_2): Made regstart and regend type failure_stack_element*. - (push_failure_point): Made pattern_place and string_place type - failure_stack_element*. - Call double_failure_stack now. - Return true instead of 1. - -Wed May 1 12:57:21 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (remove_intervening_anchors): Avoid erroneously making - ops into no_op's by making them no_op only when they're beglines. - (verify_and_adjust_endlines): Don't make '$' a normal character - if it's before a newline. - Look for the endline op in *p, not p[1]. - (failure_stack_element): Added this declaration. - (failure_stack_type): Added this declaration. - (INIT_FAILURE_STACK_SIZE, FAILURE_STACK_EMPTY, - FAILURE_STACK_PTR_EMPTY, REMAINING_AVAIL_SLOTS): Added for - failure stack. - (FAILURE_ITEM_SIZE, PUSH_FAILURE_POINT): Deleted. - (FREE_VARIABLES): Now free failure_stack.stack instead of stackb. - (re_match_2): deleted variables `initial_stack', `stackb', - `stackp', and `stacke' and added `failure_stack' to replace them. - Replaced calls to PUSH_FAILURE_POINT with those to - push_failure_point. - (push_failure_point): Added for re_match_2. - (pop_failure_point): Rewrote to use a failure_stack_type of stack. - (can_match_nothing): Moved definition to below re_match_2. - (bcmp_translate): Moved definition to below re_match_2. - -Mon Apr 29 14:20:54 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (enum regexpcode): Added codes endline_before_newline - and repeated_endline_before_newline so could detect these - types of endlines in the intermediate stages of a compiled - pattern. - (INIT_FAILURE_ALLOC): Renamed NFAILURES to this and set it to 5. - (BUF_PUSH): Put `do {...} while 0' around this. - (BUF_PUSH_2): Defined this to cut down on expansion of EXTEND_BUFFER. - (regex_compile): Changed some comments. - Now push endline_before_newline if find a `$' before a newline - in the pattern. - If a `$' might turn into an ordinary character, set laststart - to point to it. - In '^' case, if syntax bit RE_TIGHT_VBAR is set, then for `^' - to be in a leading position, it must be first in the pattern. - Don't have to check in one of the else clauses that it's not set. - If RE_CONTEXTUAL_INDEP_OPS isn't set but RE_ANCHORS_ONLY_AT_ENDS - is, make '^' a normal character if it isn't first in the pattern. - Can only detect at the end if a '$' after an alternation op is a - trailing one, so can't immediately detect empty alternatives - if a '$' follows a vbar. - Added a picture of the ``success jumps'' in alternatives. - Have to set bufp->used before calling verify_and_adjust_endlines. - Also do it before returning all error strings. - (remove_intervening_anchors): Now replaces the anchor with - repeated_endline_before_newline if it's an endline_before_newline. - (verify_and_adjust_endlines): Deleted SYNTAX parameter (could - use bufp's) and added GROUP_FORWARD_MATCH_STATUS so could - detect back references referring to empty groups. - Added variable `bend' to point past the end of the pattern buffer. - Added variable `previous_p' so wouldn't have to reinspect the - pattern buffer to see what op we just looked at. - Added endline_before_newline and repeated_endline_before_newline - cases. - When checking if in a trailing position, added case where '$' - has to be at the pattern's end if either of the syntax bits - RE_ANCHORS_ONLY_AT_ENDS or RE_TIGHT_VBAR are set. - Since `endline' can have the intermediate form `endline_in_repeat', - have to change it to `endline' if RE_REPEATED_ANCHORS_AWAY - isn't set. - Now disallow empty alternatives with trailing endlines in them - if RE_NO_EMPTY_ALTS is set. - Now don't make '$' an ordinary character if it precedes a newline. - Don't make it an ordinary character if it's before a newline. - Back references now affect the level matching something only if - they refer to nonempty groups. - (can_match_nothing): Now increment p1 in the switch, which - changes many of the cases, but makes the code more like what - it was derived from. - Adjust the return statement to reflect above. - (struct register_info): Made `can_match_nothing' field an int - instead of a bit so could have -1 in it if never set. - (MAX_FAILURE_ITEMS): Changed name from MAX_NUM_FAILURE_ITEMS. - (FAILURE_ITEM_SIZE): Defined how much space a failure items uses. - (PUSH_FAILURE_POINT): Changed variable `last_used_reg's name - to `highest_used_reg'. - Added variable `num_stack_items' and changed `len's name to - `stack_length'. - Test failure stack limit in terms of number of items in it, not - in terms of its length. rms' fix tested length against number - of items, which was a misunderstanding. - Use `realloc' instead of `alloca' to extend the failure stack. - Use shifts instead of multiplying by 2. - (FREE_VARIABLES): Free `stackb' instead of `initial_stack', as - might may have been reallocated. - (re_match_2): When mallocing `initial_stack', now multiply - the number of items wanted (what was there before) by - FAILURE_ITEM_SIZE. - (pop_failure_point): Need this procedure form of the macro of - the same name for debugging, so left it in and deleted the - macro. - (recomp): Don't free the pattern buffer's translate field. - -Mon Apr 15 09:47:47 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_DUP_MAX): Moved to outside of #ifdef _POSIX_SOURCE. - * regex.c (#include <sys/types.h>): Removed #ifdef _POSIX_SOURCE - condition. - (malloc, realloc): Made return type void* #ifdef __STDC__. - (enum regexpcode): Added endline_in_repeat for the compiler's - use; this never ends up on the final compiled pattern. - (INIT_PATTERN_OFFSETS_LIST_SIZE): Initial size for - pattern_offsets_list_type. - (pattern_offset_type): Type for pattern offsets. - (pattern_offsets_list_type): Type for keeping a list of - pattern offsets. - (anchor_list_type): Changed to above type. - (PATTERN_OFFSETS_LIST_PTR_FULL): Tests if a pattern offsets - list is full. - (ANCHOR_LIST_PTR_FULL): Changed to above. - (BIT_BLOCK_SIZE): Changed to BITS_BLOCK_SIZE and moved to - above bits list routines below regex_compile. - (op_list_type): Defined to be pattern_offsets_list_type. - (compile_stack_type): Changed offsets to be - pattern_offset_type instead of unsigned. - (pointer): Changed the name of all structure fields from this - to `avail'. - (COMPILE_STACK_FULL): Changed so the stack is full if `avail' - is equal to `size' instead of `size' - 1. - (GET_BUFFER_SPACE): Changed `>=' to `>' in the while statement. - (regex_compile): Added variable `enough_memory' so could check - that routine that verifies '$' positions could return an - allocation error. - (group_count): Deleted this variable, as `regnum' already does - this work. - (op_list): Added this variable to keep track of operations - needed for verifying '$' positions. - (anchor_list): Now initialize using routine - `init_pattern_offsets_list'. - Consolidated the three bits_list initializations. - In case '$': Instead of trying to go past constructs which can - follow '$', merely detect the special case where it has to be - at the pattern's end, fix up any fixup jumps if necessary, - record the anchor if necessary and add an `endline' (and - possibly two `no-op's) to the pattern; will call a routine at - the end to verify if it's in a valid position or not. - (init_pattern_offsets_list): Added to initialize pattern - offsets lists. - (extend_anchor_list): Renamed this extend_pattern_offsets_list - and renamed parameters and internal variables appropriately. - (add_pattern_offset): Added this routine which both - record_anchor_position and add_op call. - (adjust_pattern_offsets_list): Add this routine to adjust by - some increment all the pattern offsets a list of such after a - given position. - (record_anchor_position): Now send in offset instead of - calculating it and just call add_pattern_offset. - (adjust_anchor_list): Replaced by above routine. - (remove_intervening_anchors): If the anchor is an `endline' - then replace it with `endline_in_repeat' instead of `no_op'. - (add_op): Added this routine to call in regex_compile - wherever push something relevant to verifying '$' positions. - (verify_and_adjust_endlines): Added routine to (1) verify that - '$'s in a pattern buffer (represented by `endline') were in - valid positions and (2) whether or not they were anchors. - (BITS_BLOCK_SIZE): Renamed BIT_BLOCK_SIZE and moved to right - above bits list routines. - (BITS_BLOCK): Defines which array element of a bits list the - bit corresponding to a given position is in. - (BITS_MASK): Has a 1 where the bit (in a bit list array element) - for a given position is. - -Mon Apr 1 12:09:06 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (BIT_BLOCK_SIZE): Defined this for using with - bits_list_type, abstracted from level_list_type so could use - for more things than just the level match status. - (regex_compile): Renamed `level_list' variable to - `level_match_status'. - Added variable `group_match_status' of type bits_list_type. - Kept track of whether or not for all groups any of them - matched other than the empty string, so detect if a back - reference in front of a '^' made it nonleading or not. - Do this by setting a match status bit for all active groups - whenever leave a group that matches other than the empty string. - Could detect which groups are active by going through the - stack each time, but or-ing a bits list of active groups with - a bits list of group match status is faster, so make a bits - list of active groups instead. - Have to check that '^' isn't in a leading position before - going to normal_char. - Whenever set level match status of the current level, also set - the match status of all active groups. - Increase the group count and make that group active whenever - open a group. - When close a group, only set the next level down if the - current level matches other than the empty string, and make - the current group inactive. - At a back reference, only set a level's match status if the - group to which the back reference refers matches other than - the empty string. - (init_bits_list): Added to initialize a bits list. - (get_level_value): Deleted this. (Made into - get_level_match_status.) - (extend_bits_list): Added to extend a bits list. (Made this - from deleted routine `extend_level_list'.) - (get_bit): Added to get a bit value from a bits list. (Made - this from deleted routine `get_level_value'.) - (set_bit_to_value): Added to set a bit in a bits list. (Made - this from deleted routine `set_level_value'.) - (get_level_match_status): Added this to get the match status - of a given level. (Made from get_level_value.) - (set_this_level, set_next_lower_level): Made all routines - which set bits extend the bits list if necessary, thus they - now return an unsigned value to indicate whether or not the - reallocation failed. - (increase_level): No longer extends the level list. - (make_group_active): Added to mark as active a given group in - an active groups list. - (make_group_inactive): Added to mark as inactive a given group - in an active groups list. - (set_match_status_of_active_groups): Added to set the match - status of all currently active groups. - (get_group_match_status): Added to get a given group's match status. - (no_levels_match_anything): Removed the paramenter LEVEL. - (PUSH_FAILURE_POINT): Added rms' bug fix and changed RE_NREGS - to num_internal_regs. - -Sun Mar 31 09:04:30 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_ANCHORS_ONLY_AT_ENDS): Added syntax so could - constrain '^' and '$' to only be anchors if at the beginning - and end of the pattern. - (RE_SYNTAX_POSIX_BASIC): Added the above bit. - - * regex.c (enum regexcode): Changed `unused' to `no_op'. - (this_and_lower_levels_match_nothing): Deleted forward reference. - (regex_compile): case '^': if the syntax bit RE_ANCHORS_ONLY_AT_ENDS - is set, then '^' is only an anchor if at the beginning of the - pattern; only record anchor position if the syntax bit - RE_REPEATED_ANCHORS_AWAY is set; the '^' is a normal char if - the syntax bit RE_ANCHORS_ONLY_AT_END is set and we're not at - the beginning of the pattern (and neither RE_CONTEXTUAL_INDEP_OPS - nor RE_CONTEXTUAL_INDEP_OPS syntax bits are set). - Only adjust the anchor list if the syntax bit - RE_REPEATED_ANCHORS_AWAY is set. - - * regex.c (level_list_type): Use to detect when '^' is - in a leading position. - (regex_compile): Added level_list_type level_list variable in - which we keep track of whether or not a grouping level (in its - current or most recent incarnation) matches anything besides the - empty string. Set the bit for the i-th level when detect it - should match something other than the empty string and the bit - for the (i-1)-th level when leave the i-th group. Clear all - bits for the i-th and higher levels if none of 0--(i - 1)-th's - bits are set when encounter an alternation operator on that - level. If no levels are set when hit a '^', then it is in a - leading position. We keep track of which level we're at by - increasing a variable current_level whenever we encounter an - open-group operator and decreasing it whenever we encounter a - close-group operator. - Have to adjust the anchor list contents whenever insert - something ahead of them (such as on_failure_jump's) in the - pattern. - (adjust_anchor_list): Adjusts the offsets in an anchor list by - a given increment starting at a given start position. - (get_level_value): Returns the bit setting of a given level. - (set_level_value): Sets the bit of a given level to a given value. - (set_this_level): Sets (to 1) the bit of a given level. - (set_next_lower_level): Sets (to 1) the bit of (LEVEL - 1) for a - given LEVEL. - (clear_this_and_higher_levels): Clears the bits for a given - level and any higher levels. - (extend_level_list): Adds sizeof(unsigned) more bits to a level list. - (increase_level): Increases by 1 the value of a given level variable. - (decrease_level): Decreases by 1 the value of a given level variable. - (lower_levels_match_nothing): Checks if any levels lower than - the given one match anything. - (no_levels_match_anything): Checks if any levels match anything. - (re_match_2): At case wordbeg: before looking at d-1, check that - we're not at the string's beginning. - At case wordend: Added some illuminating parentheses. - -Mon Mar 25 13:58:51 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_ANCHOR_AT_NEWLINE): Changed syntax bit name - from RE_ANCHOR_NOT_NEWLINE because an anchor never matches the - newline itself, just the empty string either before or after it. - (RE_REPEATED_ANCHORS_AWAY): Added this syntax bit for ignoring - anchors inside groups which are operated on by repetition - operators. - (RE_DOT_MATCHES_NEWLINE): Added this bit so the match-any-character - operator could match a newline when it's set. - (RE_SYNTAX_POSIX_BASIC): Set RE_DOT_MATCHES_NEWLINE in this. - (RE_SYNTAX_POSIX_EXTENDED): Set RE_DOT_MATCHES_NEWLINE and - RE_REPEATED_ANCHORS_AWAY in this. - (regerror): Changed prototypes to new POSIX spec. - - * regex.c (anchor_list_type): Added so could null out anchors inside - repeated groups. - (ANCHOR_LIST_PTR_FULL): Added for above type. - (compile_stack_element): Changed name from stack_element. - (compile_stack_type): Changed name from compile_stack. - (INIT_COMPILE_STACK_SIZE): Changed name from INIT_STACK_SIZE. - (COMPILE_STACK_EMPTY): Changed name from STACK_EMPTY. - (COMPILE_STACK_FULL): Changed name from STACK_FULL. - (regex_compile): Changed SYNTAX parameter to non-const. - Changed variable name `stack' to `compile_stack'. - If syntax bit RE_REPEATED_ANCHORS_AWAY is set, then naively put - anchors in a list when encounter them and then set them to - `unused' when detect they are within a group operated on by a - repetition operator. Need something more sophisticated than - this, as they should only get set to `unused' if they are in - positions where they would be anchors. Also need a better way to - detect contextually invalid anchors. - Changed some commments. - (is_in_compile_stack): Changed name from `is_in_stack'. - (extend_anchor_list): Added to do anchor stuff. - (record_anchor_position): Added to do anchor stuff. - (remove_intervening_anchors): Added to do anchor stuff. - (re_match_2): Now match a newline with the match-any-character - operator if RE_DOT_MATCHES_NEWLINE is set. - Compacted some code. - (regcomp): Added new POSIX newline information to the header - commment. - If REG_NEWLINE cflag is set, then now unset RE_DOT_MATCHES_NEWLINE - in syntax. - (put_in_buffer): Added to do new POSIX regerror spec. Called - by regerror. - (regerror): Changed to take a pattern buffer, error buffer and - its size, and return type `size_t', the size of the full error - message, and the first ERRBUF_SIZE - 1 characters of the full - error message in the error buffer. - -Wed Feb 27 16:38:33 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (#include <sys/types.h>): Removed this as new POSIX - standard has the user include it. - (RE_SYNTAX_POSIX_BASIC and RE_SYNTAX_POSIX_EXTENDED): Removed - RE_HAT_LISTS_NOT_NEWLINE as new POSIX standard has the cflag - REG_NEWLINE now set this. Similarly, added syntax bit - RE_ANCHOR_NOT_NEWLINE as this is now unset by REG_NEWLINE. - (RE_SYNTAX_POSIX_BASIC): Removed syntax bit - RE_NO_CONSECUTIVE_REPEATS as POSIX now allows them. - - * regex.c (#include <sys/types.h>): Added this as new POSIX - standard has the user include it instead of us putting it in - regex.h. - (extern char *re_syntax_table): Made into an extern so the - user could allocate it. - (DO_RANGE): If don't find a range end, now goto invalid_range_end - instead of unmatched_left_bracket. - (regex_compile): Made variable SYNTAX non-const.???? - Reformatted some code. - (re_compile_fastmap): Moved is_a_succeed_n's declaration to - inner braces. - Compacted some code. - (SET_NEWLINE_FLAG): Removed and put inline. - (regcomp): Made variable `syntax' non-const so can unset - RE_ANCHOR_NOT_NEWLINE syntax bit if cflag RE_NEWLINE is set. - If cflag RE_NEWLINE is set, set the RE_HAT_LISTS_NOT_NEWLINE - syntax bit and unset RE_ANCHOR_NOT_NEWLINE one of `syntax'. - -Wed Feb 20 16:33:38 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_CONSECUTIVE_REPEATS): Changed name from - RE_NO_CONSEC_REPEATS. - (REG_ENESTING): Deleted this POSIX return value, as the stack - is now unbounded. - (struct re_pattern_buffer): Changed some comments. - (re_compile_pattern): Changed a comment. - Deleted check on stack upper bound and corresponding error. - Now when there's no interval contents and it's the end of the - pattern, go to unmatched_left_curly_brace instead of end_of_pattern. - Removed nesting_too_deep error, as the stack is now unbounded. - (regcomp): Removed REG_ENESTING case, as the stack is now unbounded. - (regerror): Removed REG_ENESTING case, as the stack is now unbounded. - - * regex.c (MAX_STACK_SIZE): Deleted because don't need upper - bound on array indexed with an unsigned number. - -Sun Feb 17 15:50:24 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h: Changed and added some comments. - - * regex.c (init_syntax_once): Made `_' a word character. - (re_compile_pattern): Added a comment. - (re_match_2): Redid header comment. - (regexec): With header comment about PMATCH, corrected and - removed details found regex.h, adding a reference. - -Fri Feb 15 09:21:31 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (DO_RANGE): Removed argument parentheses. - Now get untranslated range start and end characters and set - list bits for the translated (if at all) versions of them and - all characters between them. - (re_match_2): Now use regs->num_regs instead of num_regs_wanted - wherever possible. - (regcomp): Now build case-fold translate table using isupper - and tolower facilities so will work on foreign language characters. - -Sat Feb 9 16:40:03 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_HAT_LISTS_NOT_NEWLINE): Changed syntax bit name - from RE_LISTS_NOT_NEWLINE as it only affects nonmatching lists. - Changed all references to the match-beginning-of-string - operator to match-beginning-of-line operator, as this is what - it does. - (RE_NO_CONSEC_REPEATS): Added this syntax bit. - (RE_SYNTAX_POSIX_BASIC): Added above bit to this. - (REG_PREMATURE_END): Changed name to REG_EEND. - (REG_EXCESS_NESTING): Changed name to REG_ENESTING. - (REG_TOO_BIG): Changed name to REG_ESIZE. - (REG_INVALID_PREV_RE): Deleted this return POSIX value. - Added and changed some comments. - - * regex.c (re_compile_pattern): Now sets the pattern buffer's - `return_default_num_regs' field. - (typedef struct stack_element, stack_type, INIT_STACK_SIZE, - MAX_STACK_SIZE, STACK_EMPTY, STACK_FULL): Added for regex_compile. - (INIT_BUF_SIZE): Changed value from 28 to 32. - (BUF_PUSH): Changed name from BUFPUSH. - (MAX_BUF_SIZE): Added so could use in many places. - (IS_CHAR_CLASS_STRING): Replaced is_char_class with this. - (regex_compile): Added a stack which could grow dynamically - and which has struct elements. - Go back to initializing `zero_times_ok' and `many_time_ok' to - 0 and |=ing them inside the loop. - Now disallow consecutive repetition operators if the syntax - bit RE_NO_CONSEC_REPEATS is set. - Now detect trailing backslash when the compiler is expecting a - `?' or a `+'. - Changed calls to GET_BUFFER_SPACE which asked for 6 to ask for - 3, as that's all they needed. - Now check for trailing backslash inside lists. - Now disallow an empty alternative right before an end-of-line - operator. - Now get buffer space before leaving space for a fixup jump. - Now check if at pattern end when at open-interval operator. - Added some comments. - Now check if non-interval repetition operators follow an - interval one if the syntax bit RE_NO_CONSEC_REPEATS is set. - Now only check if what precedes an interval repetition - operator isn't a regular expression which matches one - character if the syntax bit RE_NO_CONSEC_REPEATS is set. - Now return "Unmatched [ or [^" instead of "Unmatched [". - (is_in_stack): Added to check if a given register number is in - the stack. - (re_match_2): If initial variable allocations fail, return -2, - instead of -1. - Now set reg's `num_regs' field when allocating regs. - Now before allocating them, free regs->start and end if they - aren't NULL and return -2 if either allocation fails. - Now use regs->num_regs instead of num_regs_wanted to control - regs loops. - Now increment past the newline when matching it with an - end-of-line operator. - (recomp): Added to the header comment. - Now return REG_ESUBREG if regex_compile returns "Unmatched [ - or [^" instead of doing so if it returns "Unmatched [". - Now return REG_BADRPT if in addition to returning "Missing - preceding regular expression", regex_compile returns "Invalid - preceding regular expression". - Now return new return value names (see regex.h changes). - (regexec): Added to header comment. - Initialize regs structure. - Now match whole string. - Now always free regs.start and regs.end instead of just when - the string matched. - (regerror): Now return "Regex error: Unmatched [ or [^.\n" - instead of "Regex error: Unmatched [.\n". - Now return "Regex error: Preceding regular expression either - missing or not simple.\n" instead of "Regex error: Missing - preceding regular expression.\n". - Removed REG_INVALID_PREV_RE case (it got subsumed into the - REG_BADRPT case). - -Thu Jan 17 09:52:35 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h: Changed a comment. - - * regex.c: Changed and added large header comments. - (re_compile_pattern): Now if detect that `laststart' for an - interval points to a byte code for a regular expression which - matches more than one character, make it an internal error. - (regerror): Return error message, don't print it. - -Tue Jan 15 15:32:49 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (regcomp return codes): Added GNU ones. - Updated some comments. - - * regex.c (DO_RANGE): Changed `obscure_syntax' to `syntax'. - (regex_compile): Added `following_left_brace' to keep track of - where pseudo interval following a valid interval starts. - Changed some instances that returned "Invalid regular - expression" to instead return error strings coinciding with - POSIX error codes. - Changed some comments. - Now consider only things between `[:' and `:]' to be possible - character class names. - Now a character class expression can't end a pattern; at - least a `]' must close the list. - Now if the syntax bit RE_NO_BK_CURLY_BRACES is set, then a - valid interval must be followed by yet another to get an error - for preceding an interval (in this case, the second one) with - a regular expression that matches more than one character. - Now if what follows a valid interval begins with a open - interval operator but doesn't begin a valid interval, then set - following_left_bracket to it, put it in C and go to - normal_char label. - Added some comments. - Return "Invalid character class name" instead of "Invalid - character class". - (regerror): Return messages for all POSIX error codes except - REG_ECOLLATE and REG_NEWLINE, along with all GNU error codes. - Added `break's after all cases. - (main): Call re_set_syntax instead of setting `obscure_syntax' - directly. - -Sat Jan 12 13:37:59 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (Copyright): Updated date. - (#include <sys/types.h>): Include unconditionally. - (RE_CANNOT_MATCH_NEWLINE): Deleted this syntax bit. - (RE_SYNTAX_POSIX_BASIC, RE_SYNTAX_POSIX_EXTENDED): Removed - setting the RE_ANCHOR_NOT_NEWLINE syntax bit from these. - Changed and added some comments. - (struct re_pattern_buffer): Changed some flags from chars to bits. - Added field `syntax'; holds which syntax pattern was compiled with. - Added bit flag `return_default_num_regs'. - (externs for GNU and Berkeley UNIX routines): Added `const's to - parameter types to be compatible with POSIX. - (#define const): Added to support old C compilers. - - * regex.c (Copyright): Updated date. - (enum regexpcode): Deleted `newline'. - (regex_compile): Renamed re_compile_pattern to this, added a - syntax parameter so it can set the pattern buffer's `syntax' - field. - Made `pattern', and `size' `const's so could pass to POSIX - interface routines; also made `const' whatever interval - variables had to be to make this work. - Changed references to `obscure_syntax' to new parameter `syntax'. - Deleted putting `newline' in buffer when see `\n'. - Consider invalid character classes which have nothing wrong - except the character class name; if so, return character-class error. - (is_char_class): Added routine for regex_compile. - (re_compile_pattern): added a new one which calls - regex_compile with `obscure_syntax' as the actual parameter - for the formal `syntax'. - Gave this the old routine's header comments. - Made `pattern', and `size' `const's so could use POSIX interface - routine parameters. - (re_search, re_search_2, re_match, re_match_2): Changed - `pbufp' to `bufp'. - (re_search_2, re_match_2): Changed `mstop' to `stop'. - (re_search, re_search_2): Made all parameters except `regs' - `const's so could use POSIX interface routines parameters. - (re_search_2): Added private copies of `const' parameters so - could change their values. - (re_match_2): Made all parameters except `regs' `const's so - could use POSIX interface routines parameters. - Changed `size1' and `size2' parameters to `size1_arg' and - `size2_arg' and so could change; added local `size1' and - `size2' and set to these. - Added some comments. - Deleted `newline' case. - `begline' can also possibly match if `d' contains a newline; - if it does, we have to increment d to point past the newline. - Replaced references to `obscure_syntax' with `bufp->syntax'. - (re_comp, re_exec): Made parameter `s' a `const' so could use POSIX - interface routines parameters. - Now call regex_compile, passing `obscure_syntax' via the - `syntax' parameter. - (re_exec): Made local `len' a `const' so could pass to re_search. - (regcomp): Added header comment. - Added local `syntax' to set and pass to regex_compile rather - than setting global `obscure_syntax' and passing it. - Call regex_compile with its `syntax' parameter rather than - re_compile_pattern. - Return REG_ECTYPE if character-class error. - (regexec): Don't initialize `regs' to anything. - Made `private_preg' a nonpointer so could set to what the - constant `preg' points. - Initialize `private_preg's `return_default_num_regs' field to - zero because want to return `nmatch' registers, not however - many there are subexpressions in the pattern. - Also test if `nmatch' > 0 to see if should pass re_match `regs'. - -Tue Jan 8 15:57:17 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): Reworded comment. - - * regex.c (EXTEND_BUFFER): Also reset beg_interval. - (re_search_2): Return val if val = -2. - (NUM_REG_ITEMS): Listed items in comment. - (NUM_OTHER_ITEMS): Defined this for using in > 1 definition. - (MAX_NUM_FAILURE_ITEMS): Replaced `+ 2' with NUM_OTHER_ITEMS. - (NUM_FAILURE_ITEMS): As with definition above and added to - comment. - (PUSH_FAILURE_POINT): Replaced `* 2's with `<< 1's. - (re_match_2): Test with equality with 1 to see pbufp->bol and - pbufp->eol are set. - -Fri Jan 4 15:07:22 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): Reordered some fields. - Updated some comments. - Added not_bol and not_eol fields. - (extern regcomp, regexec, regerror): Added return types. - (extern regfree): Added `extern'. - - * regex.c (min): Deleted unused macro. - (re_match_2): Compacted some code. - Removed call to macro `min' from `for' loop. - Fixed so unused registers get filled with -1's. - Fail if the pattern buffer's `not_bol' field is set and - encounter a `begline'. - Fail if the pattern buffer's `not_eol' field is set and - encounter a `endline'. - Deleted redundant check for empty stack in fail case. - Don't free pattern buffer's components in re_comp. - (regexec): Initialize variable regs. - Added `private_preg' pattern buffer so could set `not_bol' and - `not_eol' fields and hand to re_match. - Deleted naive attempt to detect anchors. - Set private pattern buffer's `not_bol' and `not_eol' fields - according to eflags value. - `nmatch' must also be > 0 for us to bother allocating - registers to send to re_match and filling pmatch - with their results after the call to re_match. - Send private pattern buffer instead of argument to re_match. - If use the registers, always free them and then set them to NULL. - (regerror): Added this Posix routine. - (regfree): Added this Posix routine. - -Tue Jan 1 15:02:45 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NREGS): Deleted this definition, as now the user - can choose how many registers to have. - (REG_NOTBOL, REG_NOTEOL): Defined these Posix eflag bits. - (REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, - REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, - REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_ENEWLINE): - Defined these return values for Posix's regcomp and regexec. - Updated some comments. - (struct re_pattern_buffer): Now typedef this as regex_t - instead of the other way around. - (struct re_registers): Added num_regs field. Made start and - end fields pointers to char instead of fixed size arrays. - (regmatch_t): Added this Posix register type. - (regcomp, regexec, regerror, regfree): Added externs for these - Posix routines. - - * regex.c (enum boolean): Typedefed this. - (re_pattern_buffer): Reformatted some comments. - (re_compile_pattern): Updated some comments. - Always push start_memory and its attendant number whenever - encounter a group, not just when its number is less than the - previous maximum number of registers; same for stop_memory. - Get 4 bytes of buffer space instead of 2 when pushing a - set_number_at. - (can_match_nothing): Added this to elaborate on and replace - code in re_match_2. - (reg_info_type): Made can_match_nothing field a bit instead of int. - (MIN): Added for re_match_2. - (re_match_2 macros): Changed all `for' loops which used - RE_NREGS to now use num_internal_regs as upper bounds. - (MAX_NUM_FAILURE_ITEMS): Use num_internal_regs instead of RE_NREGS. - (POP_FAILURE_POINT): Added check for empty stack. - (FREE_VARIABLES): Added this to free (and set to NULL) - variables allocated in re_match_2. - (re_match_2): Rearranged parameters to be in order. - Added variables num_regs_wanted (how many registers the user wants) - and num_internal_regs (how many groups there are). - Allocated initial_stack, regstart, regend, old_regstart, - old_regend, reginfo, best_regstart, and best_regend---all - which used to be fixed size arrays. Free them all and return - -1 if any fail. - Free above variables if starting position pos isn't valid. - Changed all `for' loops which used RE_NREGS to now use - num_internal_regs as upper bounds---except for the loops which - fill regs; then use num_regs_wanted. - Allocate regs if the user has passed it and wants more than 0 - registers filled. - Set regs->start[i] and regs->end[i] to -1 if either - regstart[i] or regend[i] equals -1, not just the first. - Free allocated variables before returning. - Updated some comments. - (regcomp): Return REG_ESPACE, REG_BADPAT, REG_EPAREN when - appropriate. - Free translate array. - (regexec): Added this Posix interface routine. - -Mon Dec 24 14:21:13 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: If _POSIX_SOURCE is defined then #include <sys/types.h>. - Added syntax bit RE_CANNOT_MATCH_NEWLINE. - Defined Posix cflags: REG_EXTENDED, REG_NEWLINE, REG_ICASE, and - REG_NOSUB. - Added fields re_nsub and no_sub to struct re_pattern_buffer. - Typedefed regex_t to be `struct re_pattern_buffer'. - - * regex.c (CHAR_SET_SIZE): Defined this to be 256 and replaced - incidences of this value with this constant. - (re_compile_pattern): Added switch case for `\n' and put - `newline' into the pattern buffer when encounter this. - Increment the pattern_buffer's `re_nsub' field whenever open a - group. - (re_match_2): Match a newline with `newline'---provided the - syntax bit RE_CANNOT_MATCH_NEWLINE isn't set. - (regcomp): Added this Posix interface routine. - (enum test_type): Added interface_test tag. - (main): Added Posix interface test. - -Tue Dec 18 12:58:12 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): reformatted so would fit - in texinfo documentation. - -Thu Nov 29 15:49:16 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_EMPTY_ALTS): Added this bit. - (RE_SYNTAX_POSIX_EXTENDED): Added above bit. - - * regex.c (re_compile_pattern): Disallow empty alternatives only - when RE_NO_EMPTY_ALTS is set, not when RE_CONTEXTUAL_INVALID_OPS is. - Changed RE_NO_BK_CURLY_BRACES to RE_NO_BK_PARENS when testing - for empty groups at label handle_open. - At label handle_bar: disallow empty alternatives if RE_NO_EMPTY_ALTS - is set. - Rewrote some comments. - - (re_compile_fastmap): cleaned up code. - - (re_search_2): Rewrote comment. - - (struct register_info): Added field `inner_groups'; it records - which groups are inside of the current one. - Added field can_match_nothing; it's set if the current group - can match nothing. - Added field ever_match_something; it's set if current group - ever matched something. - - (INNER_GROUPS): Added macro to access inner_groups field of - struct register_info. - - (CAN_MATCH_NOTHING): Added macro to access can_match_nothing - field of struct register_info. - - (EVER_MATCHED_SOMETHING): Added macro to access - ever_matched_something field of struct register_info. - - (NOTE_INNER_GROUP): Defined macro to record that a given group - is inside of all currently active groups. - - (re_match_2): Added variables *p1 and mcnt2 (multipurpose). - Added old_regstart and old_regend arrays to hold previous - register values if they need be restored. - Initialize added fields and variables. - case start_memory: Find out if the group can match nothing. - Save previous register values in old_restart and old_regend. - Record that current group is inside of all currently active - groups. - If the group is inside a loop and it ever matched anything, - restore its registers to values before the last failed match. - Restore the registers for the inner groups, too. - case duplicate: Can back reference to a group that never - matched if it can match nothing. - -Thu Nov 29 11:12:54 1990 Karl Berry (karl at hayley) - - * regex.c (bcopy, ...): define these if either _POSIX_SOURCE or - STDC_HEADERS is defined; same for including <stdlib.h>. - -Sat Oct 6 16:04:55 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): Changed field comments. - - * regex.c (re_compile_pattern): Allow a `$' to precede an - alternation operator (`|' or `\|'). - Disallow `^' and/or `$' in empty groups if the syntax bit - RE_NO_EMPTY_GROUPS is set. - Wait until have parsed a valid `\{...\}' interval expression - before testing RE_CONTEXTUAL_INVALID_OPS to see if it's - invalidated by that. - Don't use RE_NO_BK_CURLY_BRACES to test whether or not a validly - parsed interval expression is invalid if it has no preceding re; - rather, use RE_CONTEXTUAL_INVALID_OPS. - If an interval parses, but there is no preceding regular - expression, yet the syntax bit RE_CONTEXTUAL_INDEP_OPS is set, - then that interval can match the empty regular expression; if - the bit isn't set, then the characters in the interval - expression are parsed as themselves (sans the backslashes). - In unfetch_interval case: Moved PATFETCH to above the test for - RE_NO_BK_CURLY_BRACES being set, which would force a goto - normal_backslash; the code at both normal_backsl and normal_char - expect a character in `c.' - -Sun Sep 30 11:13:48 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Changed some comments to use the terms used in the - documentation. - (RE_CONTEXTUAL_INDEP_OPS): Changed name from `RE_CONTEXT_INDEP_OPS'. - (RE_LISTS_NOT_NEWLINE): Changed name from `RE_HAT_NOT_NEWLINE.' - (RE_ANCHOR_NOT_NEWLINE): Added this syntax bit. - (RE_NO_EMPTY_GROUPS): Added this syntax bit. - (RE_NO_HYPHEN_RANGE_END): Deleted this syntax bit. - (RE_SYNTAX_...): Reformatted. - (RE_SYNTAX_POSIX_BASIC, RE_SYNTAX_EXTENDED): Added syntax bits - RE_ANCHOR_NOT_NEWLINE and RE_NO_EMPTY_GROUPS, and deleted - RE_NO_HYPHEN_RANGE_END. - (RE_SYNTAX_POSIX_EXTENDED): Added syntax bit RE_DOT_NOT_NULL. - - * regex.c (bcopy, bcmp, bzero): Define if _POSIX_SOURCE is defined. - (_POSIX_SOURCE): ifdef this, #include <stdlib.h> - (#ifdef emacs): Changed comment of the #endif for the its #else - clause to be `not emacs', not `emacs.' - (no_pop_jump): Changed name from `jump'. - (pop_failure_jump): Changed name from `finalize_jump.' - (maybe_pop_failure_jump): Changed name from `maybe_finalize_jump'. - (no_pop_jump_n): Changed name from `jump_n.' - (EXTEND_BUFFER): Use shift instead of multiplication to double - buf->allocated. - (DO_RANGE, recompile_pattern): Added macro to set the list bits - for a range. - (re_compile_pattern): Fixed grammar problems in some comments. - Checked that RE_NO_BK_VBAR is set to make `$' valid before a `|' - and not set to make it valid before a `\|'. - Checked that RE_NO_BK_PARENS is set to make `$' valid before a ')' - and not set to make it valid before a `\)'. - Disallow ranges starting with `-', unless the range is the - first item in a list, rather than disallowing ranges which end - with `-'. - Disallow empty groups if the syntax bit RE_NO_EMPTY_GROUPS is set. - Disallow nothing preceding `{' and `\{' if they represent the - open-interval operator and RE_CONTEXTUAL_INVALID_OPS is set. - (register_info_type): typedef-ed this using `struct register_info.' - (SET_REGS_MATCHED): Compacted the code. - (re_match_2): Made it fail if back reference a group which we've - never matched. - Made `^' not match a newline if the syntax bit - RE_ANCHOR_NOT_NEWLINE is set. - (really_fail): Added this label so could force a final fail that - would not try to use the failure stack to recover. - -Sat Aug 25 14:23:01 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_CONTEXTUAL_OPS): Changed name from RE_CONTEXT_OPS. - (global): Rewrote comments and rebroke some syntax #define lines. - - * regex.c (isgraph): Added definition for sequents. - (global): Now refer to character set lists as ``lists.'' - Rewrote comments containing ``\('' or ``\)'' to now refer to - ``groups.'' - (RE_CONTEXTUAL_OPS): Changed name from RE_CONTEXT_OPS. - - (re_compile_pattern): Expanded header comment. - -Sun Jul 15 14:50:25 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_CONTEX_INDEP_OPS): the comment's sense got turned - around when we changed how it read; changed it to be correct. - -Sat Jul 14 16:38:06 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_EMPTY_BK_REF): changed name to - RE_NO_MISSING_BK_REF, as this describes it better. - - * regex.c (re_compile_pattern): changed RE_NO_EMPTY_BK_REF - to RE_NO_MISSING_BK_REF, as above. - -Thu Jul 12 11:45:05 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_EMPTY_BRACKETS): removed this syntax bit, as - bracket expressions should *never* be empty regardless of the - syntax. Removes this bit from RE_SYNTAX_POSIX_BASIC and - RE_SYNTAX_POSIX_EXTENDED. - - * regex.c (SET_LIST_BIT): in the comment, now refer to character - sets as (non)matching sets, as bracket expressions can now match - other things in addition to characters. - (re_compile_pattern): refer to groups as such instead of `\(...\)' - or somesuch, because groups can now be enclosed in either plain - parens or backslashed ones, depending on the syntax. - In the '[' case, added a boolean just_had_a_char_class to detect - whether or not a character class begins a range (which is invalid). - Restore way of breaking out of a bracket expression to original way. - Add way to detect a range if the last thing in a bracket - expression was a character class. - Took out check for c != ']' at the end of a character class in - the else clause, as it had already been checked in the if part - that also checked the validity of the string. - Set or clear just_had_a_char_class as appropriate. - Added some comments. Changed references to character sets to - ``(non)matching lists.'' - -Sun Jul 1 12:11:29 1990 Karl Berry (karl at hayley) - - * regex.h (BYTEWIDTH): moved back to regex.c. - - * regex.h (re_compile_fastmap): removed declaration; this - shouldn't be advertised. - -Mon May 28 15:27:53 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (ifndef Sword): Made comments more specific. - (global): include <stdio.h> so can write fatal messages on - standard error. Replaced calls to assert with fprintfs to - stderr and exit (1)'s. - (PREFETCH): Reformatted to make more readable. - (AT_STRINGS_BEG): Defined to test if we're at the beginning of - the virtual concatenation of string1 and string2. - (AT_STRINGS_END): Defined to test if at the end of the virtual - concatenation of string1 and string2. - (AT_WORD_BOUNDARY): Defined to test if are at a word boundary. - (IS_A_LETTER(d)): Defined to test if the contents of the pointer D - is a letter. - (re_match_2): Rewrote the wordbound, notwordbound, wordbeg, wordend, - begbuf, and endbuf cases in terms of the above four new macros. - Called SET_REGS_MATCHED in the matchsyntax, matchnotsyntax, - wordchar, and notwordchar cases. - -Mon May 14 14:49:13 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (re_search_2): Fixed RANGE to not ever take STARTPOS - outside of virtual concatenation of STRING1 and STRING2. - Updated header comment as to this. - (re_match_2): Clarified comment about MSTOP in header. - -Sat May 12 15:39:00 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (re_search_2): Checked for out-of-range STARTPOS. - Added comments. - When searching backwards, not only get the character with which - to compare to the fastmap from string2 if the starting position - >= size1, but also if size1 is zero; this is so won't get a - segmentation fault if string1 is null. - Reformatted code at label advance. - -Thu Apr 12 20:26:21 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Added #pragma once and #ifdef...endif __REGEXP_LIBRARY. - (RE_EXACTN_VALUE): Added for search.c to use. - Reworded some comments. - - regex.c: Punctuated some comments correctly. - (NULL): Removed this. - (RE_EXACTN_VALUE): Added for search.c to use. - (<ctype.h>): Moved this include to top of file. - (<assert.h>): Added this include. - (struct regexpcode): Assigned 0 to unused and 1 to exactn - because of RE_EXACTN_VALUE. - Added comment. - (various macros): Lined up backslashes near end of line. - (insert_jump): Cleaned up the header comment. - (re_search): Corrected the header comment. - (re_search_2): Cleaned up and completed the header comment. - (re_max_failures): Updated comment. - (struct register_info): Constructed as bits so as to save space - on the stack when pushing register information. - (IS_ACTIVE): Macro for struct register_info. - (MATCHED_SOMETHING): Macro for struct register_info. - (NUM_REG_ITEMS): How many register information items for each - register we have to push on the stack at each failure. - (MAX_NUM_FAILURE_ITEMS): If push all the registers on failure, - this is how many items we push on the stack. - (PUSH_FAILURE_POINT): Now pushes whether or not the register is - currently active, and whether or not it matched something. - Checks that there's enough space allocated to accomodate all the - items we currently want to push. (Before, a test for an empty - stack sufficed because we always pushed and popped the same - number of items). - Replaced ``2'' with MAX_NUM_FAILURE_POINTS when ``2'' refers - to how many things get pushed on the stack each time. - When copy the stack into the newly allocated storage, now only copy - the area in use. - Clarified comment. - (POP_FAILURE_POINT): Defined to use in places where put number - of registers on the stack into a variable before using it to - decrement the stack, so as to not confuse the compiler. - (IS_IN_FIRST_STRING): Defined to check if a pointer points into - the first string. - (SET_REGS_MATCHED): Changed to use the struct register_info - bits; also set the matched-something bit to false if the - register isn't currently active. (This is a redundant setting.) - (re_match_2): Cleaned up and completed the header comment. - Updated the failure stack comment. - Replaced the ``2'' with MAX_NUM_FAILURE_ITEMS in the static - allocation of initial_stack, because now more than two (now up - to MAX_FAILURE_ITEMS) items get pushed on the failure stack each - time. - Ditto for stackb. - Trashed restart_seg1, regend_seg1, best_regstart_seg1, and - best_regend_seg1 because they could have erroneous information - in them, such as when matching ``a'' (in string1) and ``ab'' (in - string2) with ``(a)*ab''; before using IS_IN_FIRST_STRING to see - whether or not the register starts or ends in string1, - regstart[1] pointed past the end of string1, yet regstart_seg1 - was 0! - Added variable reg_info of type struct register_info to keep - track of currently active registers and whether or not they - currently match anything. - Commented best_regs_set. - Trashed reg_active and reg_matched_something and put the - information they held into reg_info; saves space on the stack. - Replaced NULL with '\000'. - In begline case, compacted the code. - Used assert to exit if had an internal error. - In begbuf case, because now force the string we're working on - into string2 if there aren't two strings, now allow d == string2 - if there is no string1 (and the check for that is size1 == 0!); - also now succeeds if there aren't any strings at all. - (main, ifdef canned): Put test type into a variable so could - change it while debugging. - -Sat Mar 24 12:24:13 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (GET_UNSIGNED_NUMBER): Deleted references to num_fetches. - (re_compile_pattern): Deleted num_fetches because could keep - track of the number of fetches done by saving a pointer into the - pattern. - Added variable beg_interval to be used as a pointer, as above. - Assert that beg_interval points to something when it's used as above. - Initialize succeed_n's to lower_bound because re_compile_fastmap - needs to know it. - (re_compile_fastmap): Deleted unnecessary variable is_a_jump_n. - Added comment. - (re_match_2): Put number of registers on the stack into a - variable before using it to decrement the stack, so as to not - confuse the compiler. - Updated comments. - Used error routine instead of printf and exit. - In exactn case, restored longer code from ``original'' regex.c - which doesn't test translate inside a loop. - - * regex.h: Moved #define NULL and the enum regexpcode definition - and to regex.c. Changed some comments. - - regex.c (global): Updated comments about compiling and for the - re_compile_pattern jump routines. - Added #define NULL and the enum regexpcode definition (from - regex.h). - (enum regexpcode): Added set_number_at to reset the n's of - succeed_n's and jump_n's. - (re_set_syntax): Updated its comment. - (re_compile_pattern): Moved its heading comment to after its macros. - Moved its include statement to the top of the file. - Commented or added to comments of its macros. - In start_memory case: Push laststart value before adding - start_memory and its register number to the buffer, as they - might not get added. - Added code to put a set_number_at before each succeed_n and one - after each jump_n; rewrote code in what seemed a more - straightforward manner to put all these things in the pattern so - the succeed_n's would correctly jump to the set_number_at's of - the matching jump_n's, and so the jump_n's would correctly jump - to after the set_number_at's of the matching succeed_n's. - Initialize succeed_n n's to -1. - (insert_op_2): Added this to insert an operation followed by - two integers. - (re_compile_fastmap): Added set_number_at case. - (re_match_2): Moved heading comment to after macros. - Added mention of REGS to heading comment. - No longer turn a succeed_n with n = 0 into an on_failure_jump, - because n needs to be reset each time through a loop. - Check to see if a succeed_n's n is set by its set_number_at. - Added set_number_at case. - Updated some comments. - (main): Added another main to run posix tests, which is compiled - ifdef both test and canned. (Old main is still compiled ifdef - test only). - -Tue Mar 19 09:22:55 1990 Kathy Hargreaves (kathy at hayley) - - * regex.[hc]: Change all instances of the word ``legal'' to - ``valid'' and all instances of ``illegal'' to ``invalid.'' - -Sun Mar 4 12:11:31 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Added syntax bit RE_NO_EMPTY_RANGES which is set if - an ending range point has to collate higher or equal to the - starting range point. - Added syntax bit RE_NO_HYPHEN_RANGE_END which is set if a hyphen - can't be an ending range point. - Set to two above bits in RE_SYNTAX_POSIX_BASIC and - RE_SYNTAX_POSIX_EXTENDED. - - regex.c: (re_compile_pattern): Don't allow empty ranges if the - RE_NO_EMPTY_RANGES syntax bit is set. - Don't let a hyphen be a range end if the RE_NO_HYPHEN_RANGE_END - syntax bit is set. - (ESTACK_PUSH_2): renamed this PUSH_FAILURE_POINT and made it - push all the used registers on the stack, as well as the number - of the highest numbered register used, and (as before) the two - failure points. - (re_match_2): Fixed up comments. - Added arrays best_regstart[], best_regstart_seg1[], best_regend[], - and best_regend_seg1[] to keep track of the best match so far - whenever reach the end of the pattern but not the end of the - string, and there are still failure points on the stack with - which to backtrack; if so, do the saving and force a fail. - If reach the end of the pattern but not the end of the string, - but there are no more failure points to try, restore the best - match so far, set the registers and return. - Compacted some code. - In stop_memory case, if the subexpression we've just left is in - a loop, push onto the stack the loop's on_failure_jump failure - point along with the current pointer into the string (d). - In finalize_jump case, in addition to popping the failure - points, pop the saved registers. - In the fail case, restore the registers, as well as the failure - points. - -Sun Feb 18 15:08:10 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c: (global): Defined a macro GET_BUFFER_SPACE which - makes sure you have a specified number of buffer bytes - allocated. - Redefined the macro BUFPUSH to use this. - Added comments. - - (re_compile_pattern): Call GET_BUFFER_SPACE before storing or - inserting any jumps. - - (re_match_2): Set d to string1 + pos and dend to end_match_1 - only if string1 isn't null. - Force exit from a loop if it's around empty parentheses. - In stop_memory case, if found some jumps, increment p2 before - extracting address to which to jump. Also, don't need to know - how many more times can jump_n. - In begline case, d must equal string1 or string2, in that order, - only if they are not null. - In maybe_finalize_jump case, skip over start_memorys' and - stop_memorys' register numbers, too. - -Thu Feb 15 15:53:55 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (BUFPUSH): off by one goof in deciding whether to - EXTEND_BUFFER. - -Wed Jan 24 17:07:46 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Moved definition of NULL to here. - Got rid of ``In other words...'' comment. - Added to some comments. - - regex.c: (re_compile_pattern): Tried to bulletproof some code, - i.e., checked if backward references (e.g., p[-1]) were within - the range of pattern. - - (re_compile_fastmap): Fixed a bug in succeed_n part where was - getting the amount to jump instead of how many times to jump. - - (re_search_2): Changed the name of the variable ``total'' to - ``total_size.'' - Condensed some code. - - (re_match_2): Moved the comment about duplicate from above the - start_memory case to above duplicate case. - - (global): Rewrote some comments. - Added commandline arguments to testing. - -Wed Jan 17 11:47:27 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c: (global): Defined a macro STORE_NUMBER which stores a - number into two contiguous bytes. Also defined STORE_NUMBER_AND_INCR - which does the same thing and then increments the pointer to the - storage place to point after the number. - Defined a macro EXTRACT_NUMBER which extracts a number from two - continguous bytes. Also defined EXTRACT_NUMBER_AND_INCR which - does the same thing and then increments the pointer to the - source to point to after where the number was. - -Tue Jan 16 12:09:19 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Incorporated rms' changes. - Defined RE_NO_BK_REFS syntax bit which is set when want to - interpret back reference patterns as literals. - Defined RE_NO_EMPTY_BRACKETS syntax bit which is set when want - empty bracket expressions to be illegal. - Defined RE_CONTEXTUAL_ILLEGAL_OPS syntax bit which is set when want - it to be illegal for *, +, ? and { to be first in an re or come - immediately after a | or a (, and for ^ not to appear in a - nonleading position and $ in a nontrailing position (outside of - bracket expressions, that is). - Defined RE_LIMITED_OPS syntax bit which is set when want +, ? - and | to always be literals instead of ops. - Fixed up the Posix syntax. - Changed the syntax bit comments from saying, e.g., ``0 means...'' - to ``If this bit is set, it means...''. - Changed the syntax bit defines to use shifts instead of integers. - - * regex.c: (global): Incorporated rms' changes. - - (re_compile_pattern): Incorporated rms' changes - Made it illegal for a $ to appear anywhere but inside a bracket - expression or at the end of an re when RE_CONTEXTUAL_ILLEGAL_OPS - is set. Made the same hold for $ except it has to be at the - beginning of an re instead of the end. - Made the re "[]" illegal if RE_NO_EMPTY_BRACKETS is set. - Made it illegal for | to be first or last in an re, or immediately - follow another | or a (. - Added and embellished some comments. - Allowed \{ to be interpreted as a literal if RE_NO_BK_CURLY_BRACES - is set. - Made it illegal for *, +, ?, and { to appear first in an re, or - immediately follow a | or a ( when RE_CONTEXTUAL_ILLEGAL_OPS is set. - Made back references interpreted as literals if RE_NO_BK_REFS is set. - Made recursive intervals either illegal (if RE_NO_BK_CURLY_BRACES - isn't set) or interpreted as literals (if is set), if RE_INTERVALS - is set. - Made it treat +, ? and | as literals if RE_LIMITED_OPS is set. - Cleaned up some code. - -Thu Dec 21 15:31:32 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c: (global): Moved RE_DUP_MAX to regex.h and made it - equal 2^15 - 1 instead of 1000. - Defined NULL to be zero. - Moved the definition of BYTEWIDTH to regex.h. - Made the global variable obscure_syntax nonstatic so the tests in - another file could use it. - - (re_compile_pattern): Defined a maximum length (CHAR_CLASS_MAX_LENGTH) - for character class strings (i.e., what's between the [: and the - :]'s). - Defined a macro SET_LIST_BIT(c) which sets the bit for C in a - character set list. - Took out comments that EXTEND_BUFFER clobbers C. - Made the string "^" match itself, if not RE_CONTEXT_IND_OPS. - Added character classes to bracket expressions. - Change the laststart pointer saved with the start of each - subexpression to point to start_memory instead of after the - following register number. This is because the subexpression - might be in a loop. - Added comments and compacted some code. - Made intervals only work if preceded by an re matching a single - character or a subexpression. - Made back references to nonexistent subexpressions illegal if - using POSIX syntax. - Made intervals work on the last preceding character of a - concatenation of characters, e.g., ab{0,} matches abbb, not abab. - Moved macro PREFETCH to outside the routine. - - (re_compile_fastmap): Added succeed_n to work analogously to - on_failure_jump if n is zero and jump_n to work analogously to - the other backward jumps. - - (re_match_2): Defined macro SET_REGS_MATCHED to set which - current subexpressions had matches within them. - Changed some comments. - Added reg_active and reg_matched_something arrays to keep track - of in which subexpressions currently have matched something. - Defined MATCHING_IN_FIRST_STRING and replaced ``dend == end_match_1'' - with it to make code easier to understand. - Fixed so can apply * and intervals to arbitrarily nested - subexpressions. (Lots of previous bugs here.) - Changed so won't match a newline if syntax bit RE_DOT_NOT_NULL is set. - Made the upcase array nonstatic so the testing file could use it also. - - (main.c): Moved the tests out to another file. - - (tests.c): Moved all the testing stuff here. - -Sat Nov 18 19:30:30 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c: (re_compile_pattern): Defined RE_DUP_MAX, the maximum - number of times an interval can match a pattern. - Added macro GET_UNSIGNED_NUMBER (used to get below): - Added variables lower_bound and upper_bound for upper and lower - bounds of intervals. - Added variable num_fetches so intervals could do backtracking. - Added code to handle '{' and "\{" and intervals. - Added to comments. - - (store_jump_n): (Added) Stores a jump with a number following the - relative address (for intervals). - - (insert_jump_n): (Added) Inserts a jump_n. - - (re_match_2): Defined a macro ESTACK_PUSH_2 for the error stack; - it checks for overflow and reallocates if necessary. - - * regex.h: Added bits (RE_INTERVALS and RE_NO_BK_CURLY_BRACES) - to obscure syntax to indicate whether or not - a syntax handles intervals and recognizes either \{ and - \} or { and } as operators. Also added two syntaxes - RE_SYNTAX_POSIX_BASIC and RE_POSIX_EXTENDED and two command codes - to the enumeration regexpcode; they are succeed_n and jump_n. - -Sat Nov 18 19:30:30 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c: (re_compile_pattern): Defined INIT_BUFF_SIZE to get rid - of repeated constants in code. Tested with value 1. - Renamed PATPUSH as BUFPUSH, since it pushes things onto the - buffer, not the pattern. Also made this macro extend the buffer - if it's full (so could do the following): - Took out code at top of loop that checks to see if buffer is going - to be full after 10 additions (and reallocates if necessary). - - (insert_jump): Rearranged declaration lines so comments would read - better. - - (re_match_2): Compacted exactn code and added more comments. - - (main): Defined macros TEST_MATCH and MATCH_SELF to do - testing; took out loop so could use these instead. - -Tue Oct 24 20:57:18 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c (re_set_syntax): Gave argument `syntax' a type. - (store_jump, insert_jump): made them void functions. - -Local Variables: -mode: indented-text -left-margin: 8 -version-control: never -End: diff --git a/regex-0.12/doc/regex.aux b/regex-0.12/doc/regex.aux @@ -1,136 +0,0 @@ -'xrdef {Overview-pg}{1} -'xrdef {Overview-snt}{Chapter'tie1} -'xrdef {Regular Expression Syntax-pg}{2} -'xrdef {Regular Expression Syntax-snt}{Chapter'tie2} -'xrdef {Syntax Bits-pg}{2} -'xrdef {Syntax Bits-snt}{Section'tie2.1} -'xrdef {Predefined Syntaxes-pg}{5} -'xrdef {Predefined Syntaxes-snt}{Section'tie2.2} -'xrdef {Collating Elements vs. Characters-pg}{6} -'xrdef {Collating Elements vs. Characters-snt}{Section'tie2.3} -'xrdef {The Backslash Character-pg}{7} -'xrdef {The Backslash Character-snt}{Section'tie2.4} -'xrdef {Common Operators-pg}{9} -'xrdef {Common Operators-snt}{Chapter'tie3} -'xrdef {Match-self Operator-pg}{9} -'xrdef {Match-self Operator-snt}{Section'tie3.1} -'xrdef {Match-any-character Operator-pg}{9} -'xrdef {Match-any-character Operator-snt}{Section'tie3.2} -'xrdef {Concatenation Operator-pg}{10} -'xrdef {Concatenation Operator-snt}{Section'tie3.3} -'xrdef {Repetition Operators-pg}{10} -'xrdef {Repetition Operators-snt}{Section'tie3.4} -'xrdef {Match-zero-or-more Operator-pg}{10} -'xrdef {Match-zero-or-more Operator-snt}{Section'tie3.4.1} -'xrdef {Match-one-or-more Operator-pg}{11} -'xrdef {Match-one-or-more Operator-snt}{Section'tie3.4.2} -'xrdef {Match-zero-or-one Operator-pg}{11} -'xrdef {Match-zero-or-one Operator-snt}{Section'tie3.4.3} -'xrdef {Interval Operators-pg}{12} -'xrdef {Interval Operators-snt}{Section'tie3.4.4} -'xrdef {Alternation Operator-pg}{13} -'xrdef {Alternation Operator-snt}{Section'tie3.5} -'xrdef {List Operators-pg}{13} -'xrdef {List Operators-snt}{Section'tie3.6} -'xrdef {Character Class Operators-pg}{14} -'xrdef {Character Class Operators-snt}{Section'tie3.6.1} -'xrdef {Range Operator-pg}{15} -'xrdef {Range Operator-snt}{Section'tie3.6.2} -'xrdef {Grouping Operators-pg}{16} -'xrdef {Grouping Operators-snt}{Section'tie3.7} -'xrdef {Back-reference Operator-pg}{17} -'xrdef {Back-reference Operator-snt}{Section'tie3.8} -'xrdef {Anchoring Operators-pg}{18} -'xrdef {Anchoring Operators-snt}{Section'tie3.9} -'xrdef {Match-beginning-of-line Operator-pg}{18} -'xrdef {Match-beginning-of-line Operator-snt}{Section'tie3.9.1} -'xrdef {Match-end-of-line Operator-pg}{18} -'xrdef {Match-end-of-line Operator-snt}{Section'tie3.9.2} -'xrdef {GNU Operators-pg}{20} -'xrdef {GNU Operators-snt}{Chapter'tie4} -'xrdef {Word Operators-pg}{20} -'xrdef {Word Operators-snt}{Section'tie4.1} -'xrdef {Non-Emacs Syntax Tables-pg}{20} -'xrdef {Non-Emacs Syntax Tables-snt}{Section'tie4.1.1} -'xrdef {Match-word-boundary Operator-pg}{20} -'xrdef {Match-word-boundary Operator-snt}{Section'tie4.1.2} -'xrdef {Match-within-word Operator-pg}{20} -'xrdef {Match-within-word Operator-snt}{Section'tie4.1.3} -'xrdef {Match-beginning-of-word Operator-pg}{21} -'xrdef {Match-beginning-of-word Operator-snt}{Section'tie4.1.4} -'xrdef {Match-end-of-word Operator-pg}{21} -'xrdef {Match-end-of-word Operator-snt}{Section'tie4.1.5} -'xrdef {Match-word-constituent Operator-pg}{21} -'xrdef {Match-word-constituent Operator-snt}{Section'tie4.1.6} -'xrdef {Match-non-word-constituent Operator-pg}{21} -'xrdef {Match-non-word-constituent Operator-snt}{Section'tie4.1.7} -'xrdef {Buffer Operators-pg}{21} -'xrdef {Buffer Operators-snt}{Section'tie4.2} -'xrdef {Match-beginning-of-buffer Operator-pg}{21} -'xrdef {Match-beginning-of-buffer Operator-snt}{Section'tie4.2.1} -'xrdef {Match-end-of-buffer Operator-pg}{21} -'xrdef {Match-end-of-buffer Operator-snt}{Section'tie4.2.2} -'xrdef {GNU Emacs Operators-pg}{22} -'xrdef {GNU Emacs Operators-snt}{Chapter'tie5} -'xrdef {Syntactic Class Operators-pg}{22} -'xrdef {Syntactic Class Operators-snt}{Section'tie5.1} -'xrdef {Emacs Syntax Tables-pg}{22} -'xrdef {Emacs Syntax Tables-snt}{Section'tie5.1.1} -'xrdef {Match-syntactic-class Operator-pg}{22} -'xrdef {Match-syntactic-class Operator-snt}{Section'tie5.1.2} -'xrdef {Match-not-syntactic-class Operator-pg}{22} -'xrdef {Match-not-syntactic-class Operator-snt}{Section'tie5.1.3} -'xrdef {What Gets Matched?-pg}{23} -'xrdef {What Gets Matched?-snt}{Chapter'tie6} -'xrdef {Programming with Regex-pg}{24} -'xrdef {Programming with Regex-snt}{Chapter'tie7} -'xrdef {GNU Regex Functions-pg}{24} -'xrdef {GNU Regex Functions-snt}{Section'tie7.1} -'xrdef {GNU Pattern Buffers-pg}{24} -'xrdef {GNU Pattern Buffers-snt}{Section'tie7.1.1} -'xrdef {GNU Regular Expression Compiling-pg}{26} -'xrdef {GNU Regular Expression Compiling-snt}{Section'tie7.1.2} -'xrdef {GNU Matching-pg}{27} -'xrdef {GNU Matching-snt}{Section'tie7.1.3} -'xrdef {GNU Searching-pg}{28} -'xrdef {GNU Searching-snt}{Section'tie7.1.4} -'xrdef {Matching/Searching with Split Data-pg}{29} -'xrdef {Matching/Searching with Split Data-snt}{Section'tie7.1.5} -'xrdef {Searching with Fastmaps-pg}{30} -'xrdef {Searching with Fastmaps-snt}{Section'tie7.1.6} -'xrdef {GNU Translate Tables-pg}{31} -'xrdef {GNU Translate Tables-snt}{Section'tie7.1.7} -'xrdef {Using Registers-pg}{32} -'xrdef {Using Registers-snt}{Section'tie7.1.8} -'xrdef {Freeing GNU Pattern Buffers-pg}{34} -'xrdef {Freeing GNU Pattern Buffers-snt}{Section'tie7.1.9} -'xrdef {POSIX Regex Functions-pg}{35} -'xrdef {POSIX Regex Functions-snt}{Section'tie7.2} -'xrdef {POSIX Pattern Buffers-pg}{35} -'xrdef {POSIX Pattern Buffers-snt}{Section'tie7.2.1} -'xrdef {POSIX Regular Expression Compiling-pg}{35} -'xrdef {POSIX Regular Expression Compiling-snt}{Section'tie7.2.2} -'xrdef {POSIX Matching-pg}{37} -'xrdef {POSIX Matching-snt}{Section'tie7.2.3} -'xrdef {Reporting Errors-pg}{38} -'xrdef {Reporting Errors-snt}{Section'tie7.2.4} -'xrdef {Using Byte Offsets-pg}{39} -'xrdef {Using Byte Offsets-snt}{Section'tie7.2.5} -'xrdef {Freeing POSIX Pattern Buffers-pg}{39} -'xrdef {Freeing POSIX Pattern Buffers-snt}{Section'tie7.2.6} -'xrdef {BSD Regex Functions-pg}{40} -'xrdef {BSD Regex Functions-snt}{Section'tie7.3} -'xrdef {BSD Regular Expression Compiling-pg}{40} -'xrdef {BSD Regular Expression Compiling-snt}{Section'tie7.3.1} -'xrdef {BSD Searching-pg}{40} -'xrdef {BSD Searching-snt}{Section'tie7.3.2} -'xrdef {Copying-pg}{42} -'xrdef {Copying-snt}{Appendix'tie'char65{}} -'xrdef {Copying-pg}{42} -'xrdef {Copying-snt}{} -'xrdef {Copying-pg}{43} -'xrdef {Copying-snt}{} -'xrdef {Copying-pg}{48} -'xrdef {Copying-snt}{} -'xrdef {Index-pg}{50} -'xrdef {Index-snt}{} diff --git a/regex-0.12/doc/regex.cps b/regex-0.12/doc/regex.cps @@ -1,152 +0,0 @@ -\initial {$} -\entry {\code {$}}{18} -\initial {(} -\entry {\code {(}}{16} -\initial {)} -\entry {\code {)}}{16} -\initial {*} -\entry {\samp {*}}{10} -\initial {-} -\entry {\samp {-}}{13} -\initial {.} -\entry {\samp {.}}{9} -\initial {:} -\entry {\samp {:]} in regex}{14} -\initial {?} -\entry {\samp {?}}{11} -\initial {[} -\entry {\samp {[}}{13} -\entry {\samp {[:} in regex}{14} -\entry {\samp {[{\tt\hat}}}{13} -\initial {]} -\entry {\samp {]}}{13} -\initial {{\tt\char'173}} -\entry {\samp {{\tt\char'173}}}{12} -\initial {{\tt\char'174}} -\entry {\code {{\tt\char'174}}}{13} -\initial {{\tt\char'175}} -\entry {\samp {{\tt\char'175}}}{12} -\initial {{\tt\char43}} -\entry {\samp {{\tt\char43}}}{11} -\initial {{\tt\hat}} -\entry {\samp {{\tt\hat}}}{13} -\entry {\code {{\tt\hat}}}{18} -\initial {{\tt\indexbackslash }} -\entry {{\tt\indexbackslash }}{7} -\entry {\samp {{\tt\indexbackslash }}}{13} -\entry {\samp {{\tt\indexbackslash }'}}{21} -\entry {\code {{\tt\indexbackslash }(}}{16} -\entry {\code {{\tt\indexbackslash })}}{16} -\entry {\samp {{\tt\indexbackslash }`}}{21} -\entry {\samp {{\tt\indexbackslash }{\tt\char'173}}}{12} -\entry {\code {{\tt\indexbackslash }{\tt\char'174}}}{13} -\entry {\samp {{\tt\indexbackslash }{\tt\char'175}}}{12} -\entry {\samp {{\tt\indexbackslash }{\tt\gtr}}}{21} -\entry {\samp {{\tt\indexbackslash }{\tt\less}}}{21} -\entry {\samp {{\tt\indexbackslash }b}}{20} -\entry {\samp {{\tt\indexbackslash }B}}{20} -\entry {\samp {{\tt\indexbackslash }s}}{22} -\entry {\samp {{\tt\indexbackslash }S}}{22} -\entry {\samp {{\tt\indexbackslash }w}}{21} -\entry {\samp {{\tt\indexbackslash }W}}{21} -\initial {A} -\entry {\code {allocated \r {initialization}}}{26} -\entry {alternation operator}{13} -\entry {alternation operator and \samp {{\tt\hat}}}{18} -\entry {anchoring}{18} -\entry {anchors}{18} -\entry {Awk}{5} -\initial {B} -\entry {back references}{17} -\entry {backtracking}{10, 13} -\entry {beginning-of-line operator}{18} -\entry {bracket expression}{13} -\entry {\code {buffer \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {\code {buffer \r {initialization}}}{26} -\initial {C} -\entry {character classes}{14} -\initial {E} -\entry {Egrep}{5} -\entry {Emacs}{5} -\entry {end-of-line operator}{18} -\entry {\code {end\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\initial {F} -\entry {\code {fastmap \r {initialization}}}{26} -\entry {\code {fastmap{\_}accurate \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {fastmaps}{30} -\initial {G} -\entry {Grep}{5} -\entry {grouping}{16} -\initial {I} -\entry {ignoring case}{35} -\entry {interval expression}{12} -\initial {M} -\entry {matching list}{13} -\entry {matching newline}{13} -\entry {matching with GNU functions}{27} -\initial {N} -\entry {\code {newline{\_}anchor \r {field in pattern buffer}}}{18} -\entry {nonmatching list}{13} -\entry {\code {not{\_}bol \r {field in pattern buffer}}}{18} -\entry {\code {num_regs\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\initial {O} -\entry {open-group operator and \samp {{\tt\hat}}}{18} -\entry {or operator}{13} -\initial {P} -\entry {parenthesizing}{16} -\entry {pattern buffer initialization}{26} -\entry {pattern buffer, definition of}{24} -\entry {POSIX Awk}{5} -\initial {R} -\entry {\code {range \r {argument to \code {re{\_}search}}}}{28} -\entry {\code {re_registers}}{32} -\entry {\code {RE{\_}BACKSLASH{\_}ESCAPE{\_}IN{\_}LIST}}{3} -\entry {\code {RE{\_}BK{\_}PLUS{\_}QM}}{3} -\entry {\code {RE{\_}CHAR{\_}CLASSES}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS \r {(and \samp {{\tt\hat}})}}}{18} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}OPS}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INVALID{\_}OPS}}{3} -\entry {\code {RE{\_}DOT{\_}NEWLINE}}{3} -\entry {\code {RE{\_}DOT{\_}NOT{\_}NULL}}{4} -\entry {\code {RE{\_}INTERVALS}}{4} -\entry {\code {RE{\_}LIMITED{\_}OPS}}{4} -\entry {\code {RE{\_}NEWLINE{\_}ALT}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}BRACES}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}PARENS}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}REFS}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}VBAR}}{4} -\entry {\code {RE{\_}NO{\_}EMPTY{\_}RANGES}}{4} -\entry {\code {re{\_}nsub \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {\code {re{\_}pattern{\_}buffer \r {definition}}}{24} -\entry {\code {re{\_}syntax{\_}options \r {initialization}}}{26} -\entry {\code {RE{\_}UNMATCHED{\_}RIGHT{\_}PAREN{\_}ORD}}{4} -\entry {\code {REG{\_}EXTENDED}}{35} -\entry {\code {REG{\_}ICASE}}{35} -\entry {\code {REG{\_}NEWLINE}}{36} -\entry {\code {REG{\_}NOSUB}}{35} -\entry {\code {regex.c}}{1} -\entry {\code {regex.h}}{1} -\entry {regexp anchoring}{18} -\entry {\code {regmatch{\_}t}}{39} -\entry {\code {regs{\_}allocated}}{32} -\entry {\code {REGS{\_}FIXED}}{33} -\entry {\code {REGS{\_}REALLOCATE}}{32} -\entry {\code {REGS{\_}UNALLOCATED}}{32} -\entry {regular expressions, syntax of}{2} -\initial {S} -\entry {searching with GNU functions}{28} -\entry {\code {start \r {argument to \code {re{\_}search}}}}{28} -\entry {\code {start\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\entry {\code {struct re{\_}pattern{\_}buffer \r {definition}}}{24} -\entry {subexpressions}{16} -\entry {syntax bits}{2} -\entry {\code {syntax \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {syntax initialization}{26} -\entry {syntax of regular expressions}{2} -\initial {T} -\entry {\code {translate \r {initialization}}}{26} -\initial {U} -\entry {\code {used \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\initial {W} -\entry {word boundaries, matching}{20} diff --git a/regex-0.12/doc/regex.info b/regex-0.12/doc/regex.info @@ -1,2836 +0,0 @@ -This is Info file regex.info, produced by Makeinfo-1.52 from the input -file .././doc/regex.texi. - - This file documents the GNU regular expression library. - - Copyright (C) 1992, 1993 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - - Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled "GNU General Public License" is included exactly as in -the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this -one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that the section entitled "GNU General Public License" -may be included in a translation approved by the Free Software -Foundation instead of in the original English. - - -File: regex.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir) - -Regular Expression Library -************************** - - This manual documents how to program with the GNU regular expression -library. This is edition 0.12a of the manual, 19 September 1992. - - The first part of this master menu lists the major nodes in this Info -document, including the index. The rest of the menu lists all the -lower level nodes in the document. - -* Menu: - -* Overview:: -* Regular Expression Syntax:: -* Common Operators:: -* GNU Operators:: -* GNU Emacs Operators:: -* What Gets Matched?:: -* Programming with Regex:: -* Copying:: Copying and sharing Regex. -* Index:: General index. - -- The Detailed Node Listing -- - -Regular Expression Syntax - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - -Common Operators - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? {} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - -Repetition Operators - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: {} - -List Operators (`[' ... `]' and `[^' ... `]') - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - -Anchoring Operators - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - -GNU Operators - -* Word Operators:: -* Buffer Operators:: - -Word Operators - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - -Buffer Operators - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - -GNU Emacs Operators - -* Syntactic Class Operators:: - -Syntactic Class Operators - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - -Programming with Regex - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - -GNU Regex Functions - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - -POSIX Regex Functions - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - -BSD Regex Functions - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () - - -File: regex.info, Node: Overview, Next: Regular Expression Syntax, Prev: Top, Up: Top - -Overview -******** - - A "regular expression" (or "regexp", or "pattern") is a text string -that describes some (mathematical) set of strings. A regexp R -"matches" a string S if S is in the set of strings described by R. - - Using the Regex library, you can: - - * see if a string matches a specified pattern as a whole, and - - * search within a string for a substring matching a specified - pattern. - - Some regular expressions match only one string, i.e., the set they -describe has only one member. For example, the regular expression -`foo' matches the string `foo' and no others. Other regular -expressions match more than one string, i.e., the set they describe has -more than one member. For example, the regular expression `f*' matches -the set of strings made up of any number (including zero) of `f's. As -you can see, some characters in regular expressions match themselves -(such as `f') and some don't (such as `*'); the ones that don't match -themselves instead let you specify patterns that describe many -different strings. - - To either match or search for a regular expression with the Regex -library functions, you must first compile it with a Regex pattern -compiling function. A "compiled pattern" is a regular expression -converted to the internal format used by the library functions. Once -you've compiled a pattern, you can use it for matching or searching any -number of times. - - The Regex library consists of two source files: `regex.h' and -`regex.c'. Regex provides three groups of functions with which you can -operate on regular expressions. One group--the GNU group--is more -powerful but not completely compatible with the other two, namely the -POSIX and Berkeley UNIX groups; its interface was designed specifically -for GNU. The other groups have the same interfaces as do the regular -expression functions in POSIX and Berkeley UNIX. - - We wrote this chapter with programmers in mind, not users of -programs--such as Emacs--that use Regex. We describe the Regex library -in its entirety, not how to write regular expressions that a particular -program understands. - - -File: regex.info, Node: Regular Expression Syntax, Next: Common Operators, Prev: Overview, Up: Top - -Regular Expression Syntax -************************* - - "Characters" are things you can type. "Operators" are things in a -regular expression that match one or more characters. You compose -regular expressions from operators, which in turn you specify using one -or more characters. - - Most characters represent what we call the match-self operator, i.e., -they match themselves; we call these characters "ordinary". Other -characters represent either all or parts of fancier operators; e.g., -`.' represents what we call the match-any-character operator (which, no -surprise, matches (almost) any character); we call these characters -"special". Two different things determine what characters represent -what operators: - - 1. the regular expression syntax your program has told the Regex - library to recognize, and - - 2. the context of the character in the regular expression. - - In the following sections, we describe these things in more detail. - -* Menu: - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - - -File: regex.info, Node: Syntax Bits, Next: Predefined Syntaxes, Up: Regular Expression Syntax - -Syntax Bits -=========== - - In any particular syntax for regular expressions, some characters are -always special, others are sometimes special, and others are never -special. The particular syntax that Regex recognizes for a given -regular expression depends on the value in the `syntax' field of the -pattern buffer of that regular expression. - - You get a pattern buffer by compiling a regular expression. *Note -GNU Pattern Buffers::, and *Note POSIX Pattern Buffers::, for more -information on pattern buffers. *Note GNU Regular Expression -Compiling::, *Note POSIX Regular Expression Compiling::, and *Note BSD -Regular Expression Compiling::, for more information on compiling. - - Regex considers the value of the `syntax' field to be a collection of -bits; we refer to these bits as "syntax bits". In most cases, they -affect what characters represent what operators. We describe the -meanings of the operators to which we refer in *Note Common Operators::, -*Note GNU Operators::, and *Note GNU Emacs Operators::. - - For reference, here is the complete list of syntax bits, in -alphabetical order: - -`RE_BACKSLASH_ESCAPE_IN_LISTS' - If this bit is set, then `\' inside a list (*note List Operators::. - quotes (makes ordinary, if it's special) the following character; - if this bit isn't set, then `\' is an ordinary character inside - lists. (*Note The Backslash Character::, for what `\' does - outside of lists.) - -`RE_BK_PLUS_QM' - If this bit is set, then `\+' represents the match-one-or-more - operator and `\?' represents the match-zero-or-more operator; if - this bit isn't set, then `+' represents the match-one-or-more - operator and `?' represents the match-zero-or-one operator. This - bit is irrelevant if `RE_LIMITED_OPS' is set. - -`RE_CHAR_CLASSES' - If this bit is set, then you can use character classes in lists; - if this bit isn't set, then you can't. - -`RE_CONTEXT_INDEP_ANCHORS' - If this bit is set, then `^' and `$' are special anywhere outside - a list; if this bit isn't set, then these characters are special - only in certain contexts. *Note Match-beginning-of-line - Operator::, and *Note Match-end-of-line Operator::. - -`RE_CONTEXT_INDEP_OPS' - If this bit is set, then certain characters are special anywhere - outside a list; if this bit isn't set, then those characters are - special only in some contexts and are ordinary elsewhere. - Specifically, if this bit isn't set then `*', and (if the syntax - bit `RE_LIMITED_OPS' isn't set) `+' and `?' (or `\+' and `\?', - depending on the syntax bit `RE_BK_PLUS_QM') represent repetition - operators only if they're not first in a regular expression or - just after an open-group or alternation operator. The same holds - for `{' (or `\{', depending on the syntax bit `RE_NO_BK_BRACES') if - it is the beginning of a valid interval and the syntax bit - `RE_INTERVALS' is set. - -`RE_CONTEXT_INVALID_OPS' - If this bit is set, then repetition and alternation operators - can't be in certain positions within a regular expression. - Specifically, the regular expression is invalid if it has: - - * a repetition operator first in the regular expression or just - after a match-beginning-of-line, open-group, or alternation - operator; or - - * an alternation operator first or last in the regular - expression, just before a match-end-of-line operator, or just - after an alternation or open-group operator. - - If this bit isn't set, then you can put the characters - representing the repetition and alternation characters anywhere in - a regular expression. Whether or not they will in fact be - operators in certain positions depends on other syntax bits. - -`RE_DOT_NEWLINE' - If this bit is set, then the match-any-character operator matches - a newline; if this bit isn't set, then it doesn't. - -`RE_DOT_NOT_NULL' - If this bit is set, then the match-any-character operator doesn't - match a null character; if this bit isn't set, then it does. - -`RE_INTERVALS' - If this bit is set, then Regex recognizes interval operators; if - this bit isn't set, then it doesn't. - -`RE_LIMITED_OPS' - If this bit is set, then Regex doesn't recognize the - match-one-or-more, match-zero-or-one or alternation operators; if - this bit isn't set, then it does. - -`RE_NEWLINE_ALT' - If this bit is set, then newline represents the alternation - operator; if this bit isn't set, then newline is ordinary. - -`RE_NO_BK_BRACES' - If this bit is set, then `{' represents the open-interval operator - and `}' represents the close-interval operator; if this bit isn't - set, then `\{' represents the open-interval operator and `\}' - represents the close-interval operator. This bit is relevant only - if `RE_INTERVALS' is set. - -`RE_NO_BK_PARENS' - If this bit is set, then `(' represents the open-group operator and - `)' represents the close-group operator; if this bit isn't set, - then `\(' represents the open-group operator and `\)' represents - the close-group operator. - -`RE_NO_BK_REFS' - If this bit is set, then Regex doesn't recognize `\'DIGIT as the - back reference operator; if this bit isn't set, then it does. - -`RE_NO_BK_VBAR' - If this bit is set, then `|' represents the alternation operator; - if this bit isn't set, then `\|' represents the alternation - operator. This bit is irrelevant if `RE_LIMITED_OPS' is set. - -`RE_NO_EMPTY_RANGES' - If this bit is set, then a regular expression with a range whose - ending point collates lower than its starting point is invalid; if - this bit isn't set, then Regex considers such a range to be empty. - -`RE_UNMATCHED_RIGHT_PAREN_ORD' - If this bit is set and the regular expression has no matching - open-group operator, then Regex considers what would otherwise be - a close-group operator (based on how `RE_NO_BK_PARENS' is set) to - match `)'. - - -File: regex.info, Node: Predefined Syntaxes, Next: Collating Elements vs. Characters, Prev: Syntax Bits, Up: Regular Expression Syntax - -Predefined Syntaxes -=================== - - If you're programming with Regex, you can set a pattern buffer's -(*note GNU Pattern Buffers::., and *Note POSIX Pattern Buffers::) -`syntax' field either to an arbitrary combination of syntax bits (*note -Syntax Bits::.) or else to the configurations defined by Regex. These -configurations define the syntaxes used by certain programs--GNU Emacs, -POSIX Awk, traditional Awk, Grep, Egrep--in addition to syntaxes for -POSIX basic and extended regular expressions. - - The predefined syntaxes-taken directly from `regex.h'--are: - - #define RE_SYNTAX_EMACS 0 - - #define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - - #define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - - #define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - - #define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - - #define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - - /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ - #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - - #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - - /* Syntax bits common to both basic and extended POSIX regex syntax. */ - #define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - - #define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - - /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ - #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - - #define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - - /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ - #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) - - -File: regex.info, Node: Collating Elements vs. Characters, Next: The Backslash Character, Prev: Predefined Syntaxes, Up: Regular Expression Syntax - -Collating Elements vs. Characters -================================= - - POSIX generalizes the notion of a character to that of a collating -element. It defines a "collating element" to be "a sequence of one or -more bytes defined in the current collating sequence as a unit of -collation." - - This generalizes the notion of a character in two ways. First, a -single character can map into two or more collating elements. For -example, the German "es-zet" collates as the collating element `s' -followed by another collating element `s'. Second, two or more -characters can map into one collating element. For example, the -Spanish `ll' collates after `l' and before `m'. - - Since POSIX's "collating element" preserves the essential idea of a -"character," we use the latter, more familiar, term in this document. - - -File: regex.info, Node: The Backslash Character, Prev: Collating Elements vs. Characters, Up: Regular Expression Syntax - -The Backslash Character -======================= - - The `\' character has one of four different meanings, depending on -the context in which you use it and what syntax bits are set (*note -Syntax Bits::.). It can: 1) stand for itself, 2) quote the next -character, 3) introduce an operator, or 4) do nothing. - - 1. It stands for itself inside a list (*note List Operators::.) if - the syntax bit `RE_BACKSLASH_ESCAPE_IN_LISTS' is not set. For - example, `[\]' would match `\'. - - 2. It quotes (makes ordinary, if it's special) the next character - when you use it either: - - * outside a list,(1) or - - * inside a list and the syntax bit - `RE_BACKSLASH_ESCAPE_IN_LISTS' is set. - - 3. It introduces an operator when followed by certain ordinary - characters--sometimes only when certain syntax bits are set. See - the cases `RE_BK_PLUS_QM', `RE_NO_BK_BRACES', `RE_NO_BK_VAR', - `RE_NO_BK_PARENS', `RE_NO_BK_REF' in *Note Syntax Bits::. Also: - - * `\b' represents the match-word-boundary operator (*note - Match-word-boundary Operator::.). - - * `\B' represents the match-within-word operator (*note - Match-within-word Operator::.). - - * `\<' represents the match-beginning-of-word operator - (*note Match-beginning-of-word Operator::.). - - * `\>' represents the match-end-of-word operator (*note - Match-end-of-word Operator::.). - - * `\w' represents the match-word-constituent operator (*note - Match-word-constituent Operator::.). - - * `\W' represents the match-non-word-constituent operator - (*note Match-non-word-constituent Operator::.). - - * `\`' represents the match-beginning-of-buffer operator and - `\'' represents the match-end-of-buffer operator (*note - Buffer Operators::.). - - * If Regex was compiled with the C preprocessor symbol `emacs' - defined, then `\sCLASS' represents the match-syntactic-class - operator and `\SCLASS' represents the - match-not-syntactic-class operator (*note Syntactic Class - Operators::.). - - 4. In all other cases, Regex ignores `\'. For example, `\n' matches - `n'. - - - ---------- Footnotes ---------- - - (1) Sometimes you don't have to explicitly quote special characters -to make them ordinary. For instance, most characters lose any special -meaning inside a list (*note List Operators::.). In addition, if the -syntax bits `RE_CONTEXT_INVALID_OPS' and `RE_CONTEXT_INDEP_OPS' aren't -set, then (for historical reasons) the matcher considers special -characters ordinary if they are in contexts where the operations they -represent make no sense; for example, then the match-zero-or-more -operator (represented by `*') matches itself in the regular expression -`*foo' because there is no preceding expression on which it can -operate. It is poor practice, however, to depend on this behavior; if -you want a special character to be ordinary outside a list, it's better -to always quote it, regardless. - - -File: regex.info, Node: Common Operators, Next: GNU Operators, Prev: Regular Expression Syntax, Up: Top - -Common Operators -**************** - - You compose regular expressions from operators. In the following -sections, we describe the regular expression operators specified by -POSIX; GNU also uses these. Most operators have more than one -representation as characters. *Note Regular Expression Syntax::, for -what characters represent what operators under what circumstances. - - For most operators that can be represented in two ways, one -representation is a single character and the other is that character -preceded by `\'. For example, either `(' or `\(' represents the -open-group operator. Which one does depends on the setting of a syntax -bit, in this case `RE_NO_BK_PARENS'. Why is this so? Historical -reasons dictate some of the varying representations, while POSIX -dictates others. - - Finally, almost all characters lose any special meaning inside a list -(*note List Operators::.). - -* Menu: - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? {} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - - -File: regex.info, Node: Match-self Operator, Next: Match-any-character Operator, Up: Common Operators - -The Match-self Operator (ORDINARY CHARACTER) -============================================ - - This operator matches the character itself. All ordinary characters -(*note Regular Expression Syntax::.) represent this operator. For -example, `f' is always an ordinary character, so the regular expression -`f' matches only the string `f'. In particular, it does *not* match -the string `ff'. - - -File: regex.info, Node: Match-any-character Operator, Next: Concatenation Operator, Prev: Match-self Operator, Up: Common Operators - -The Match-any-character Operator (`.') -====================================== - - This operator matches any single printing or nonprinting character -except it won't match a: - -newline - if the syntax bit `RE_DOT_NEWLINE' isn't set. - -null - if the syntax bit `RE_DOT_NOT_NULL' is set. - - The `.' (period) character represents this operator. For example, -`a.b' matches any three-character string beginning with `a' and ending -with `b'. - - -File: regex.info, Node: Concatenation Operator, Next: Repetition Operators, Prev: Match-any-character Operator, Up: Common Operators - -The Concatenation Operator -========================== - - This operator concatenates two regular expressions A and B. No -character represents this operator; you simply put B after A. The -result is a regular expression that will match a string if A matches -its first part and B matches the rest. For example, `xy' (two -match-self operators) matches `xy'. - - -File: regex.info, Node: Repetition Operators, Next: Alternation Operator, Prev: Concatenation Operator, Up: Common Operators - -Repetition Operators -==================== - - Repetition operators repeat the preceding regular expression a -specified number of times. - -* Menu: - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: {} - - -File: regex.info, Node: Match-zero-or-more Operator, Next: Match-one-or-more Operator, Up: Repetition Operators - -The Match-zero-or-more Operator (`*') -------------------------------------- - - This operator repeats the smallest possible preceding regular -expression as many times as necessary (including zero) to match the -pattern. `*' represents this operator. For example, `o*' matches any -string made up of zero or more `o's. Since this operator operates on -the smallest preceding regular expression, `fo*' has a repeating `o', -not a repeating `fo'. So, `fo*' matches `f', `fo', `foo', and so on. - - Since the match-zero-or-more operator is a suffix operator, it may be -useless as such when no regular expression precedes it. This is the -case when it: - - * is first in a regular expression, or - - * follows a match-beginning-of-line, open-group, or alternation - operator. - -Three different things can happen in these cases: - - 1. If the syntax bit `RE_CONTEXT_INVALID_OPS' is set, then the - regular expression is invalid. - - 2. If `RE_CONTEXT_INVALID_OPS' isn't set, but `RE_CONTEXT_INDEP_OPS' - is, then `*' represents the match-zero-or-more operator (which - then operates on the empty string). - - 3. Otherwise, `*' is ordinary. - - - The matcher processes a match-zero-or-more operator by first matching -as many repetitions of the smallest preceding regular expression as it -can. Then it continues to match the rest of the pattern. - - If it can't match the rest of the pattern, it backtracks (as many -times as necessary), each time discarding one of the matches until it -can either match the entire pattern or be certain that it cannot get a -match. For example, when matching `ca*ar' against `caaar', the matcher -first matches all three `a's of the string with the `a*' of the regular -expression. However, it cannot then match the final `ar' of the -regular expression against the final `r' of the string. So it -backtracks, discarding the match of the last `a' in the string. It can -then match the remaining `ar'. - - -File: regex.info, Node: Match-one-or-more Operator, Next: Match-zero-or-one Operator, Prev: Match-zero-or-more Operator, Up: Repetition Operators - -The Match-one-or-more Operator (`+' or `\+') --------------------------------------------- - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM' -isn't set, then `+' represents this operator; if it is, then `\+' does. - - This operator is similar to the match-zero-or-more operator except -that it repeats the preceding regular expression at least once; *note -Match-zero-or-more Operator::., for what it operates on, how some -syntax bits affect it, and how Regex backtracks to match it. - - For example, supposing that `+' represents the match-one-or-more -operator; then `ca+r' matches, e.g., `car' and `caaaar', but not `cr'. - - -File: regex.info, Node: Match-zero-or-one Operator, Next: Interval Operators, Prev: Match-one-or-more Operator, Up: Repetition Operators - -The Match-zero-or-one Operator (`?' or `\?') --------------------------------------------- - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM' -isn't set, then `?' represents this operator; if it is, then `\?' does. - - This operator is similar to the match-zero-or-more operator except -that it repeats the preceding regular expression once or not at all; -*note Match-zero-or-more Operator::., to see what it operates on, how -some syntax bits affect it, and how Regex backtracks to match it. - - For example, supposing that `?' represents the match-zero-or-one -operator; then `ca?r' matches both `car' and `cr', but nothing else. - - -File: regex.info, Node: Interval Operators, Prev: Match-zero-or-one Operator, Up: Repetition Operators - -Interval Operators (`{' ... `}' or `\{' ... `\}') -------------------------------------------------- - - If the syntax bit `RE_INTERVALS' is set, then Regex recognizes -"interval expressions". They repeat the smallest possible preceding -regular expression a specified number of times. - - If the syntax bit `RE_NO_BK_BRACES' is set, `{' represents the -"open-interval operator" and `}' represents the "close-interval -operator" ; otherwise, `\{' and `\}' do. - - Specifically, supposing that `{' and `}' represent the open-interval -and close-interval operators; then: - -`{COUNT}' - matches exactly COUNT occurrences of the preceding regular - expression. - -`{MIN,}' - matches MIN or more occurrences of the preceding regular - expression. - -`{MIN, MAX}' - matches at least MIN but no more than MAX occurrences of the - preceding regular expression. - - The interval expression (but not necessarily the regular expression -that contains it) is invalid if: - - * MIN is greater than MAX, or - - * any of COUNT, MIN, or MAX are outside the range zero to - `RE_DUP_MAX' (which symbol `regex.h' defines). - - If the interval expression is invalid and the syntax bit -`RE_NO_BK_BRACES' is set, then Regex considers all the characters in -the would-be interval to be ordinary. If that bit isn't set, then the -regular expression is invalid. - - If the interval expression is valid but there is no preceding regular -expression on which to operate, then if the syntax bit -`RE_CONTEXT_INVALID_OPS' is set, the regular expression is invalid. If -that bit isn't set, then Regex considers all the characters--other than -backslashes, which it ignores--in the would-be interval to be ordinary. - - -File: regex.info, Node: Alternation Operator, Next: List Operators, Prev: Repetition Operators, Up: Common Operators - -The Alternation Operator (`|' or `\|') -====================================== - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_NO_BK_VBAR' -is set, then `|' represents this operator; otherwise, `\|' does. - - Alternatives match one of a choice of regular expressions: if you put -the character(s) representing the alternation operator between any two -regular expressions A and B, the result matches the union of the -strings that A and B match. For example, supposing that `|' is the -alternation operator, then `foo|bar|quux' would match any of `foo', -`bar' or `quux'. - - The alternation operator operates on the *largest* possible -surrounding regular expressions. (Put another way, it has the lowest -precedence of any regular expression operator.) Thus, the only way you -can delimit its arguments is to use grouping. For example, if `(' and -`)' are the open and close-group operators, then `fo(o|b)ar' would -match either `fooar' or `fobar'. (`foo|bar' would match `foo' or -`bar'.) - - The matcher usually tries all combinations of alternatives so as to -match the longest possible string. For example, when matching -`(fooq|foo)*(qbarquux|bar)' against `fooqbarquux', it cannot take, say, -the first ("depth-first") combination it could match, since then it -would be content to match just `fooqbar'. - - -File: regex.info, Node: List Operators, Next: Grouping Operators, Prev: Alternation Operator, Up: Common Operators - -List Operators (`[' ... `]' and `[^' ... `]') -============================================= - - "Lists", also called "bracket expressions", are a set of one or more -items. An "item" is a character, a character class expression, or a -range expression. The syntax bits affect which kinds of items you can -put in a list. We explain the last two items in subsections below. -Empty lists are invalid. - - A "matching list" matches a single character represented by one of -the list items. You form a matching list by enclosing one or more items -within an "open-matching-list operator" (represented by `[') and a -"close-list operator" (represented by `]'). - - For example, `[ab]' matches either `a' or `b'. `[ad]*' matches the -empty string and any string composed of just `a's and `d's in any -order. Regex considers invalid a regular expression with a `[' but no -matching `]'. - - "Nonmatching lists" are similar to matching lists except that they -match a single character *not* represented by one of the list items. -You use an "open-nonmatching-list operator" (represented by `[^'(1)) -instead of an open-matching-list operator to start a nonmatching list. - - For example, `[^ab]' matches any character except `a' or `b'. - - If the `posix_newline' field in the pattern buffer (*note GNU Pattern -Buffers::. is set, then nonmatching lists do not match a newline. - - Most characters lose any special meaning inside a list. The special -characters inside a list follow. - -`]' - ends the list if it's not the first list item. So, if you want to - make the `]' character a list item, you must put it first. - -`\' - quotes the next character if the syntax bit - `RE_BACKSLASH_ESCAPE_IN_LISTS' is set. - -`[:' - represents the open-character-class operator (*note Character - Class Operators::.) if the syntax bit `RE_CHAR_CLASSES' is set and - what follows is a valid character class expression. - -`:]' - represents the close-character-class operator if the syntax bit - `RE_CHAR_CLASSES' is set and what precedes it is an - open-character-class operator followed by a valid character class - name. - -`-' - represents the range operator (*note Range Operator::.) if it's - not first or last in a list or the ending point of a range. - -All other characters are ordinary. For example, `[.*]' matches `.' and -`*'. - -* Menu: - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - - ---------- Footnotes ---------- - - (1) Regex therefore doesn't consider the `^' to be the first -character in the list. If you put a `^' character first in (what you -think is) a matching list, you'll turn it into a nonmatching list. - - -File: regex.info, Node: Character Class Operators, Next: Range Operator, Up: List Operators - -Character Class Operators (`[:' ... `:]') ------------------------------------------ - - If the syntax bit `RE_CHARACTER_CLASSES' is set, then Regex -recognizes character class expressions inside lists. A "character -class expression" matches one character from a given class. You form a -character class expression by putting a character class name between an -"open-character-class operator" (represented by `[:') and a -"close-character-class operator" (represented by `:]'). The character -class names and their meanings are: - -`alnum' - letters and digits - -`alpha' - letters - -`blank' - system-dependent; for GNU, a space or tab - -`cntrl' - control characters (in the ASCII encoding, code 0177 and codes - less than 040) - -`digit' - digits - -`graph' - same as `print' except omits space - -`lower' - lowercase letters - -`print' - printable characters (in the ASCII encoding, space tilde--codes - 040 through 0176) - -`punct' - neither control nor alphanumeric characters - -`space' - space, carriage return, newline, vertical tab, and form feed - -`upper' - uppercase letters - -`xdigit' - hexadecimal digits: `0'-`9', `a'-`f', `A'-`F' - -These correspond to the definitions in the C library's `<ctype.h>' -facility. For example, `[:alpha:]' corresponds to the standard -facility `isalpha'. Regex recognizes character class expressions only -inside of lists; so `[[:alpha:]]' matches any letter, but `[:alpha:]' -outside of a bracket expression and not followed by a repetition -operator matches just itself. - - -File: regex.info, Node: Range Operator, Prev: Character Class Operators, Up: List Operators - -The Range Operator (`-') ------------------------- - - Regex recognizes "range expressions" inside a list. They represent -those characters that fall between two elements in the current -collating sequence. You form a range expression by putting a "range -operator" between two characters.(1) `-' represents the range operator. -For example, `a-f' within a list represents all the characters from `a' -through `f' inclusively. - - If the syntax bit `RE_NO_EMPTY_RANGES' is set, then if the range's -ending point collates less than its starting point, the range (and the -regular expression containing it) is invalid. For example, the regular -expression `[z-a]' would be invalid. If this bit isn't set, then Regex -considers such a range to be empty. - - Since `-' represents the range operator, if you want to make a `-' -character itself a list item, you must do one of the following: - - * Put the `-' either first or last in the list. - - * Include a range whose starting point collates strictly lower than - `-' and whose ending point collates equal or higher. Unless a - range is the first item in a list, a `-' can't be its starting - point, but *can* be its ending point. That is because Regex - considers `-' to be the range operator unless it is preceded by - another `-'. For example, in the ASCII encoding, `)', `*', `+', - `,', `-', `.', and `/' are contiguous characters in the collating - sequence. You might think that `[)-+--/]' has two ranges: `)-+' - and `--/'. Rather, it has the ranges `)-+' and `+--', plus the - character `/', so it matches, e.g., `,', not `.'. - - * Put a range whose starting point is `-' first in the list. - - For example, `[-a-z]' matches a lowercase letter or a hyphen (in -English, in ASCII). - - ---------- Footnotes ---------- - - (1) You can't use a character class for the starting or ending point -of a range, since a character class is not a single character. - - -File: regex.info, Node: Grouping Operators, Next: Back-reference Operator, Prev: List Operators, Up: Common Operators - -Grouping Operators (`(' ... `)' or `\(' ... `\)') -================================================= - - A "group", also known as a "subexpression", consists of an -"open-group operator", any number of other operators, and a -"close-group operator". Regex treats this sequence as a unit, just as -mathematics and programming languages treat a parenthesized expression -as a unit. - - Therefore, using "groups", you can: - - * delimit the argument(s) to an alternation operator (*note - Alternation Operator::.) or a repetition operator (*note - Repetition Operators::.). - - * keep track of the indices of the substring that matched a given - group. *Note Using Registers::, for a precise explanation. This - lets you: - - * use the back-reference operator (*note Back-reference - Operator::.). - - * use registers (*note Using Registers::.). - - If the syntax bit `RE_NO_BK_PARENS' is set, then `(' represents the -open-group operator and `)' represents the close-group operator; -otherwise, `\(' and `\)' do. - - If the syntax bit `RE_UNMATCHED_RIGHT_PAREN_ORD' is set and a -close-group operator has no matching open-group operator, then Regex -considers it to match `)'. - - -File: regex.info, Node: Back-reference Operator, Next: Anchoring Operators, Prev: Grouping Operators, Up: Common Operators - -The Back-reference Operator ("\"DIGIT) -====================================== - - If the syntax bit `RE_NO_BK_REF' isn't set, then Regex recognizes -back references. A back reference matches a specified preceding group. -The back reference operator is represented by `\DIGIT' anywhere after -the end of a regular expression's DIGIT-th group (*note Grouping -Operators::.). - - DIGIT must be between `1' and `9'. The matcher assigns numbers 1 -through 9 to the first nine groups it encounters. By using one of `\1' -through `\9' after the corresponding group's close-group operator, you -can match a substring identical to the one that the group does. - - Back references match according to the following (in all examples -below, `(' represents the open-group, `)' the close-group, `{' the -open-interval and `}' the close-interval operator): - - * If the group matches a substring, the back reference matches an - identical substring. For example, `(a)\1' matches `aa' and - `(bana)na\1bo\1' matches `bananabanabobana'. Likewise, `(.*)\1' - matches any (newline-free if the syntax bit `RE_DOT_NEWLINE' isn't - set) string that is composed of two identical halves; the `(.*)' - matches the first half and the `\1' matches the second half. - - * If the group matches more than once (as it might if followed by, - e.g., a repetition operator), then the back reference matches the - substring the group *last* matched. For example, `((a*)b)*\1\2' - matches `aabababa'; first group 1 (the outer one) matches `aab' - and group 2 (the inner one) matches `aa'. Then group 1 matches - `ab' and group 2 matches `a'. So, `\1' matches `ab' and `\2' - matches `a'. - - * If the group doesn't participate in a match, i.e., it is part of an - alternative not taken or a repetition operator allows zero - repetitions of it, then the back reference makes the whole match - fail. For example, `(one()|two())-and-(three\2|four\3)' matches - `one-and-three' and `two-and-four', but not `one-and-four' or - `two-and-three'. For example, if the pattern matches `one-and-', - then its group 2 matches the empty string and its group 3 doesn't - participate in the match. So, if it then matches `four', then - when it tries to back reference group 3--which it will attempt to - do because `\3' follows the `four'--the match will fail because - group 3 didn't participate in the match. - - You can use a back reference as an argument to a repetition operator. -For example, `(a(b))\2*' matches `a' followed by two or more `b's. -Similarly, `(a(b))\2{3}' matches `abbbb'. - - If there is no preceding DIGIT-th subexpression, the regular -expression is invalid. - - -File: regex.info, Node: Anchoring Operators, Prev: Back-reference Operator, Up: Common Operators - -Anchoring Operators -=================== - - These operators can constrain a pattern to match only at the -beginning or end of the entire string or at the beginning or end of a -line. - -* Menu: - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - - -File: regex.info, Node: Match-beginning-of-line Operator, Next: Match-end-of-line Operator, Up: Anchoring Operators - -The Match-beginning-of-line Operator (`^') ------------------------------------------- - - This operator can match the empty string either at the beginning of -the string or after a newline character. Thus, it is said to "anchor" -the pattern to the beginning of a line. - - In the cases following, `^' represents this operator. (Otherwise, -`^' is ordinary.) - - * It (the `^') is first in the pattern, as in `^foo'. - - * The syntax bit `RE_CONTEXT_INDEP_ANCHORS' is set, and it is outside - a bracket expression. - - * It follows an open-group or alternation operator, as in `a\(^b\)' - and `a\|^b'. *Note Grouping Operators::, and *Note Alternation - Operator::. - - These rules imply that some valid patterns containing `^' cannot be -matched; for example, `foo^bar' if `RE_CONTEXT_INDEP_ANCHORS' is set. - - If the `not_bol' field is set in the pattern buffer (*note GNU -Pattern Buffers::.), then `^' fails to match at the beginning of the -string. *Note POSIX Matching::, for when you might find this useful. - - If the `newline_anchor' field is set in the pattern buffer, then `^' -fails to match after a newline. This is useful when you do not regard -the string to be matched as broken into lines. - - -File: regex.info, Node: Match-end-of-line Operator, Prev: Match-beginning-of-line Operator, Up: Anchoring Operators - -The Match-end-of-line Operator (`$') ------------------------------------- - - This operator can match the empty string either at the end of the -string or before a newline character in the string. Thus, it is said -to "anchor" the pattern to the end of a line. - - It is always represented by `$'. For example, `foo$' usually -matches, e.g., `foo' and, e.g., the first three characters of -`foo\nbar'. - - Its interaction with the syntax bits and pattern buffer fields is -exactly the dual of `^''s; see the previous section. (That is, -"beginning" becomes "end", "next" becomes "previous", and "after" -becomes "before".) - - -File: regex.info, Node: GNU Operators, Next: GNU Emacs Operators, Prev: Common Operators, Up: Top - -GNU Operators -************* - - Following are operators that GNU defines (and POSIX doesn't). - -* Menu: - -* Word Operators:: -* Buffer Operators:: - - -File: regex.info, Node: Word Operators, Next: Buffer Operators, Up: GNU Operators - -Word Operators -============== - - The operators in this section require Regex to recognize parts of -words. Regex uses a syntax table to determine whether or not a -character is part of a word, i.e., whether or not it is -"word-constituent". - -* Menu: - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - - -File: regex.info, Node: Non-Emacs Syntax Tables, Next: Match-word-boundary Operator, Up: Word Operators - -Non-Emacs Syntax Tables ------------------------ - - A "syntax table" is an array indexed by the characters in your -character set. In the ASCII encoding, therefore, a syntax table has -256 elements. Regex always uses a `char *' variable `re_syntax_table' -as its syntax table. In some cases, it initializes this variable and -in others it expects you to initialize it. - - * If Regex is compiled with the preprocessor symbols `emacs' and - `SYNTAX_TABLE' both undefined, then Regex allocates - `re_syntax_table' and initializes an element I either to `Sword' - (which it defines) if I is a letter, number, or `_', or to zero if - it's not. - - * If Regex is compiled with `emacs' undefined but `SYNTAX_TABLE' - defined, then Regex expects you to define a `char *' variable - `re_syntax_table' to be a valid syntax table. - - * *Note Emacs Syntax Tables::, for what happens when Regex is - compiled with the preprocessor symbol `emacs' defined. - - -File: regex.info, Node: Match-word-boundary Operator, Next: Match-within-word Operator, Prev: Non-Emacs Syntax Tables, Up: Word Operators - -The Match-word-boundary Operator (`\b') ---------------------------------------- - - This operator (represented by `\b') matches the empty string at -either the beginning or the end of a word. For example, `\brat\b' -matches the separate word `rat'. - - -File: regex.info, Node: Match-within-word Operator, Next: Match-beginning-of-word Operator, Prev: Match-word-boundary Operator, Up: Word Operators - -The Match-within-word Operator (`\B') -------------------------------------- - - This operator (represented by `\B') matches the empty string within a -word. For example, `c\Brat\Be' matches `crate', but `dirty \Brat' -doesn't match `dirty rat'. - - -File: regex.info, Node: Match-beginning-of-word Operator, Next: Match-end-of-word Operator, Prev: Match-within-word Operator, Up: Word Operators - -The Match-beginning-of-word Operator (`\<') -------------------------------------------- - - This operator (represented by `\<') matches the empty string at the -beginning of a word. - - -File: regex.info, Node: Match-end-of-word Operator, Next: Match-word-constituent Operator, Prev: Match-beginning-of-word Operator, Up: Word Operators - -The Match-end-of-word Operator (`\>') -------------------------------------- - - This operator (represented by `\>') matches the empty string at the -end of a word. - - -File: regex.info, Node: Match-word-constituent Operator, Next: Match-non-word-constituent Operator, Prev: Match-end-of-word Operator, Up: Word Operators - -The Match-word-constituent Operator (`\w') ------------------------------------------- - - This operator (represented by `\w') matches any word-constituent -character. - - -File: regex.info, Node: Match-non-word-constituent Operator, Prev: Match-word-constituent Operator, Up: Word Operators - -The Match-non-word-constituent Operator (`\W') ----------------------------------------------- - - This operator (represented by `\W') matches any character that is not -word-constituent. - - -File: regex.info, Node: Buffer Operators, Prev: Word Operators, Up: GNU Operators - -Buffer Operators -================ - - Following are operators which work on buffers. In Emacs, a "buffer" -is, naturally, an Emacs buffer. For other programs, Regex considers the -entire string to be matched as the buffer. - -* Menu: - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - - -File: regex.info, Node: Match-beginning-of-buffer Operator, Next: Match-end-of-buffer Operator, Up: Buffer Operators - -The Match-beginning-of-buffer Operator (`\`') ---------------------------------------------- - - This operator (represented by `\`') matches the empty string at the -beginning of the buffer. - - -File: regex.info, Node: Match-end-of-buffer Operator, Prev: Match-beginning-of-buffer Operator, Up: Buffer Operators - -The Match-end-of-buffer Operator (`\'') ---------------------------------------- - - This operator (represented by `\'') matches the empty string at the -end of the buffer. - - -File: regex.info, Node: GNU Emacs Operators, Next: What Gets Matched?, Prev: GNU Operators, Up: Top - -GNU Emacs Operators -******************* - - Following are operators that GNU defines (and POSIX doesn't) that you -can use only when Regex is compiled with the preprocessor symbol -`emacs' defined. - -* Menu: - -* Syntactic Class Operators:: - - -File: regex.info, Node: Syntactic Class Operators, Up: GNU Emacs Operators - -Syntactic Class Operators -========================= - - The operators in this section require Regex to recognize the syntactic -classes of characters. Regex uses a syntax table to determine this. - -* Menu: - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - - -File: regex.info, Node: Emacs Syntax Tables, Next: Match-syntactic-class Operator, Up: Syntactic Class Operators - -Emacs Syntax Tables -------------------- - - A "syntax table" is an array indexed by the characters in your -character set. In the ASCII encoding, therefore, a syntax table has -256 elements. - - If Regex is compiled with the preprocessor symbol `emacs' defined, -then Regex expects you to define and initialize the variable -`re_syntax_table' to be an Emacs syntax table. Emacs' syntax tables -are more complicated than Regex's own (*note Non-Emacs Syntax -Tables::.). *Note Syntax: (emacs)Syntax, for a description of Emacs' -syntax tables. - - -File: regex.info, Node: Match-syntactic-class Operator, Next: Match-not-syntactic-class Operator, Prev: Emacs Syntax Tables, Up: Syntactic Class Operators - -The Match-syntactic-class Operator (`\s'CLASS) ----------------------------------------------- - - This operator matches any character whose syntactic class is -represented by a specified character. `\sCLASS' represents this -operator where CLASS is the character representing the syntactic class -you want. For example, `w' represents the syntactic class of -word-constituent characters, so `\sw' matches any word-constituent -character. - - -File: regex.info, Node: Match-not-syntactic-class Operator, Prev: Match-syntactic-class Operator, Up: Syntactic Class Operators - -The Match-not-syntactic-class Operator (`\S'CLASS) --------------------------------------------------- - - This operator is similar to the match-syntactic-class operator except -that it matches any character whose syntactic class is *not* -represented by the specified character. `\SCLASS' represents this -operator. For example, `w' represents the syntactic class of -word-constituent characters, so `\Sw' matches any character that is not -word-constituent. - - -File: regex.info, Node: What Gets Matched?, Next: Programming with Regex, Prev: GNU Emacs Operators, Up: Top - -What Gets Matched? -****************** - - Regex usually matches strings according to the "leftmost longest" -rule; that is, it chooses the longest of the leftmost matches. This -does not mean that for a regular expression containing subexpressions -that it simply chooses the longest match for each subexpression, left to -right; the overall match must also be the longest possible one. - - For example, `(ac*)(c*d[ac]*)\1' matches `acdacaaa', not `acdac', as -it would if it were to choose the longest match for the first -subexpression. - - -File: regex.info, Node: Programming with Regex, Next: Copying, Prev: What Gets Matched?, Up: Top - -Programming with Regex -********************** - - Here we describe how you use the Regex data structures and functions -in C programs. Regex has three interfaces: one designed for GNU, one -compatible with POSIX and one compatible with Berkeley UNIX. - -* Menu: - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - - -File: regex.info, Node: GNU Regex Functions, Next: POSIX Regex Functions, Up: Programming with Regex - -GNU Regex Functions -=================== - - If you're writing code that doesn't need to be compatible with either -POSIX or Berkeley UNIX, you can use these functions. They provide more -options than the other interfaces. - -* Menu: - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - - -File: regex.info, Node: GNU Pattern Buffers, Next: GNU Regular Expression Compiling, Up: GNU Regex Functions - -GNU Pattern Buffers -------------------- - - To compile, match, or search for a given regular expression, you must -supply a pattern buffer. A "pattern buffer" holds one compiled regular -expression.(1) - - You can have several different pattern buffers simultaneously, each -holding a compiled pattern for a different regular expression. - - `regex.h' defines the pattern buffer `struct' as follows: - - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ - #define REGS_UNALLOCATED 0 - #define REGS_REALLOCATE 1 - #define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - - ---------- Footnotes ---------- - - (1) Regular expressions are also referred to as "patterns," hence -the name "pattern buffer." - - -File: regex.info, Node: GNU Regular Expression Compiling, Next: GNU Matching, Prev: GNU Pattern Buffers, Up: GNU Regex Functions - -GNU Regular Expression Compiling --------------------------------- - - In GNU, you can both match and search for a given regular expression. -To do either, you must first compile it in a pattern buffer (*note GNU -Pattern Buffers::.). - - Regular expressions match according to the syntax with which they were -compiled; with GNU, you indicate what syntax you want by setting the -variable `re_syntax_options' (declared in `regex.h' and defined in -`regex.c') before calling the compiling function, `re_compile_pattern' -(see below). *Note Syntax Bits::, and *Note Predefined Syntaxes::. - - You can change the value of `re_syntax_options' at any time. -Usually, however, you set its value once and then never change it. - - `re_compile_pattern' takes a pattern buffer as an argument. You must -initialize the following fields: - -`translate initialization' -`translate' - Initialize this to point to a translate table if you want one, or - to zero if you don't. We explain translate tables in *Note GNU - Translate Tables::. - -`fastmap' - Initialize this to nonzero if you want a fastmap, or to zero if you - don't. - -`buffer' -`allocated' - If you want `re_compile_pattern' to allocate memory for the - compiled pattern, set both of these to zero. If you have an - existing block of memory (allocated with `malloc') you want Regex - to use, set `buffer' to its address and `allocated' to its size (in - bytes). - - `re_compile_pattern' uses `realloc' to extend the space for the - compiled pattern as necessary. - - To compile a pattern buffer, use: - - char * - re_compile_pattern (const char *REGEX, const int REGEX_SIZE, - struct re_pattern_buffer *PATTERN_BUFFER) - -REGEX is the regular expression's address, REGEX_SIZE is its length, -and PATTERN_BUFFER is the pattern buffer's address. - - If `re_compile_pattern' successfully compiles the regular expression, -it returns zero and sets `*PATTERN_BUFFER' to the compiled pattern. It -sets the pattern buffer's fields as follows: - -`buffer' - to the compiled pattern. - -`used' - to the number of bytes the compiled pattern in `buffer' occupies. - -`syntax' - to the current value of `re_syntax_options'. - -`re_nsub' - to the number of subexpressions in REGEX. - -`fastmap_accurate' - to zero on the theory that the pattern you're compiling is - different than the one previously compiled into `buffer'; in that - case (since you can't make a fastmap without a compiled pattern), - `fastmap' would either contain an incompatible fastmap, or nothing - at all. - - If `re_compile_pattern' can't compile REGEX, it returns an error -string corresponding to one of the errors listed in *Note POSIX Regular -Expression Compiling::. - - -File: regex.info, Node: GNU Matching, Next: GNU Searching, Prev: GNU Regular Expression Compiling, Up: GNU Regex Functions - -GNU Matching ------------- - - Matching the GNU way means trying to match as much of a string as -possible starting at a position within it you specify. Once you've -compiled a pattern into a pattern buffer (*note GNU Regular Expression -Compiling::.), you can ask the matcher to match that pattern against a -string using: - - int - re_match (struct re_pattern_buffer *PATTERN_BUFFER, - const char *STRING, const int SIZE, - const int START, struct re_registers *REGS) - -PATTERN_BUFFER is the address of a pattern buffer containing a compiled -pattern. STRING is the string you want to match; it can contain -newline and null characters. SIZE is the length of that string. START -is the string index at which you want to begin matching; the first -character of STRING is at index zero. *Note Using Registers::, for a -explanation of REGS; you can safely pass zero. - - `re_match' matches the regular expression in PATTERN_BUFFER against -the string STRING according to the syntax in PATTERN_BUFFERS's `syntax' -field. (*Note GNU Regular Expression Compiling::, for how to set it.) -The function returns -1 if the compiled pattern does not match any part -of STRING and -2 if an internal error happens; otherwise, it returns -how many (possibly zero) characters of STRING the pattern matched. - - An example: suppose PATTERN_BUFFER points to a pattern buffer -containing the compiled pattern for `a*', and STRING points to `aaaaab' -(whereupon SIZE should be 6). Then if START is 2, `re_match' returns 3, -i.e., `a*' would have matched the last three `a's in STRING. If START -is 0, `re_match' returns 5, i.e., `a*' would have matched all the `a's -in STRING. If START is either 5 or 6, it returns zero. - - If START is not between zero and SIZE, then `re_match' returns -1. - - -File: regex.info, Node: GNU Searching, Next: Matching/Searching with Split Data, Prev: GNU Matching, Up: GNU Regex Functions - -GNU Searching -------------- - - "Searching" means trying to match starting at successive positions -within a string. The function `re_search' does this. - - Before calling `re_search', you must compile your regular expression. -*Note GNU Regular Expression Compiling::. - - Here is the function declaration: - - int - re_search (struct re_pattern_buffer *PATTERN_BUFFER, - const char *STRING, const int SIZE, - const int START, const int RANGE, - struct re_registers *REGS) - -whose arguments are the same as those to `re_match' (*note GNU -Matching::.) except that the two arguments START and RANGE replace -`re_match''s argument START. - - If RANGE is positive, then `re_search' attempts a match starting -first at index START, then at START + 1 if that fails, and so on, up to -START + RANGE; if RANGE is negative, then it attempts a match starting -first at index START, then at START -1 if that fails, and so on. - - If START is not between zero and SIZE, then `re_search' returns -1. -When RANGE is positive, `re_search' adjusts RANGE so that START + RANGE -- 1 is between zero and SIZE, if necessary; that way it won't search -outside of STRING. Similarly, when RANGE is negative, `re_search' -adjusts RANGE so that START + RANGE + 1 is between zero and SIZE, if -necessary. - - If the `fastmap' field of PATTERN_BUFFER is zero, `re_search' matches -starting at consecutive positions; otherwise, it uses `fastmap' to make -the search more efficient. *Note Searching with Fastmaps::. - - If no match is found, `re_search' returns -1. If a match is found, -it returns the index where the match began. If an internal error -happens, it returns -2. - - -File: regex.info, Node: Matching/Searching with Split Data, Next: Searching with Fastmaps, Prev: GNU Searching, Up: GNU Regex Functions - -Matching and Searching with Split Data --------------------------------------- - - Using the functions `re_match_2' and `re_search_2', you can match or -search in data that is divided into two strings. - - The function: - - int - re_match_2 (struct re_pattern_buffer *BUFFER, - const char *STRING1, const int SIZE1, - const char *STRING2, const int SIZE2, - const int START, - struct re_registers *REGS, - const int STOP) - -is similar to `re_match' (*note GNU Matching::.) except that you pass -*two* data strings and sizes, and an index STOP beyond which you don't -want the matcher to try matching. As with `re_match', if it succeeds, -`re_match_2' returns how many characters of STRING it matched. Regard -STRING1 and STRING2 as concatenated when you set the arguments START and -STOP and use the contents of REGS; `re_match_2' never returns a value -larger than SIZE1 + SIZE2. - - The function: - - int - re_search_2 (struct re_pattern_buffer *BUFFER, - const char *STRING1, const int SIZE1, - const char *STRING2, const int SIZE2, - const int START, const int RANGE, - struct re_registers *REGS, - const int STOP) - -is similarly related to `re_search'. - - -File: regex.info, Node: Searching with Fastmaps, Next: GNU Translate Tables, Prev: Matching/Searching with Split Data, Up: GNU Regex Functions - -Searching with Fastmaps ------------------------ - - If you're searching through a long string, you should use a fastmap. -Without one, the searcher tries to match at consecutive positions in the -string. Generally, most of the characters in the string could not start -a match. It takes much longer to try matching at a given position in -the string than it does to check in a table whether or not the -character at that position could start a match. A "fastmap" is such a -table. - - More specifically, a fastmap is an array indexed by the characters in -your character set. Under the ASCII encoding, therefore, a fastmap has -256 elements. If you want the searcher to use a fastmap with a given -pattern buffer, you must allocate the array and assign the array's -address to the pattern buffer's `fastmap' field. You either can -compile the fastmap yourself or have `re_search' do it for you; when -`fastmap' is nonzero, it automatically compiles a fastmap the first -time you search using a particular compiled pattern. - - To compile a fastmap yourself, use: - - int - re_compile_fastmap (struct re_pattern_buffer *PATTERN_BUFFER) - -PATTERN_BUFFER is the address of a pattern buffer. If the character C -could start a match for the pattern, `re_compile_fastmap' makes -`PATTERN_BUFFER->fastmap[C]' nonzero. It returns 0 if it can compile a -fastmap and -2 if there is an internal error. For example, if `|' is -the alternation operator and PATTERN_BUFFER holds the compiled pattern -for `a|b', then `re_compile_fastmap' sets `fastmap['a']' and -`fastmap['b']' (and no others). - - `re_search' uses a fastmap as it moves along in the string: it checks -the string's characters until it finds one that's in the fastmap. Then -it tries matching at that character. If the match fails, it repeats -the process. So, by using a fastmap, `re_search' doesn't waste time -trying to match at positions in the string that couldn't start a match. - - If you don't want `re_search' to use a fastmap, store zero in the -`fastmap' field of the pattern buffer before calling `re_search'. - - Once you've initialized a pattern buffer's `fastmap' field, you need -never do so again--even if you compile a new pattern in it--provided -the way the field is set still reflects whether or not you want a -fastmap. `re_search' will still either do nothing if `fastmap' is null -or, if it isn't, compile a new fastmap for the new pattern. - - -File: regex.info, Node: GNU Translate Tables, Next: Using Registers, Prev: Searching with Fastmaps, Up: GNU Regex Functions - -GNU Translate Tables --------------------- - - If you set the `translate' field of a pattern buffer to a translate -table, then the GNU Regex functions to which you've passed that pattern -buffer use it to apply a simple transformation to all the regular -expression and string characters at which they look. - - A "translate table" is an array indexed by the characters in your -character set. Under the ASCII encoding, therefore, a translate table -has 256 elements. The array's elements are also characters in your -character set. When the Regex functions see a character C, they use -`translate[C]' in its place, with one exception: the character after a -`\' is not translated. (This ensures that, the operators, e.g., `\B' -and `\b', are always distinguishable.) - - For example, a table that maps all lowercase letters to the -corresponding uppercase ones would cause the matcher to ignore -differences in case.(1) Such a table would map all characters except -lowercase letters to themselves, and lowercase letters to the -corresponding uppercase ones. Under the ASCII encoding, here's how you -could initialize such a table (we'll call it `case_fold'): - - for (i = 0; i < 256; i++) - case_fold[i] = i; - for (i = 'a'; i <= 'z'; i++) - case_fold[i] = i - ('a' - 'A'); - - You tell Regex to use a translate table on a given pattern buffer by -assigning that table's address to the `translate' field of that buffer. -If you don't want Regex to do any translation, put zero into this -field. You'll get weird results if you change the table's contents -anytime between compiling the pattern buffer, compiling its fastmap, and -matching or searching with the pattern buffer. - - ---------- Footnotes ---------- - - (1) A table that maps all uppercase letters to the corresponding -lowercase ones would work just as well for this purpose. - - -File: regex.info, Node: Using Registers, Next: Freeing GNU Pattern Buffers, Prev: GNU Translate Tables, Up: GNU Regex Functions - -Using Registers ---------------- - - A group in a regular expression can match a (posssibly empty) -substring of the string that regular expression as a whole matched. -The matcher remembers the beginning and end of the substring matched by -each group. - - To find out what they matched, pass a nonzero REGS argument to a GNU -matching or searching function (*note GNU Matching::. and *Note GNU -Searching::), i.e., the address of a structure of this type, as defined -in `regex.h': - - struct re_registers - { - unsigned num_regs; - regoff_t *start; - regoff_t *end; - }; - - Except for (possibly) the NUM_REGS'th element (see below), the Ith -element of the `start' and `end' arrays records information about the -Ith group in the pattern. (They're declared as C pointers, but this is -only because not all C compilers accept zero-length arrays; -conceptually, it is simplest to think of them as arrays.) - - The `start' and `end' arrays are allocated in various ways, depending -on the value of the `regs_allocated' field in the pattern buffer passed -to the matcher. - - The simplest and perhaps most useful is to let the matcher -(re)allocate enough space to record information for all the groups in -the regular expression. If `regs_allocated' is `REGS_UNALLOCATED', the -matcher allocates 1 + RE_NSUB (another field in the pattern buffer; -*note GNU Pattern Buffers::.). The extra element is set to -1, and -sets `regs_allocated' to `REGS_REALLOCATE'. Then on subsequent calls -with the same pattern buffer and REGS arguments, the matcher -reallocates more space if necessary. - - It would perhaps be more logical to make the `regs_allocated' field -part of the `re_registers' structure, instead of part of the pattern -buffer. But in that case the caller would be forced to initialize the -structure before passing it. Much existing code doesn't do this -initialization, and it's arguably better to avoid it anyway. - - `re_compile_pattern' sets `regs_allocated' to `REGS_UNALLOCATED', so -if you use the GNU regular expression functions, you get this behavior -by default. - - xx document re_set_registers - - POSIX, on the other hand, requires a different interface: the caller -is supposed to pass in a fixed-length array which the matcher fills. -Therefore, if `regs_allocated' is `REGS_FIXED' the matcher simply fills -that array. - - The following examples illustrate the information recorded in the -`re_registers' structure. (In all of them, `(' represents the -open-group and `)' the close-group operator. The first character in -the string STRING is at index 0.) - - * If the regular expression has an I-th group not contained within - another group that matches a substring of STRING, then the - function sets `REGS->start[I]' to the index in STRING where the - substring matched by the I-th group begins, and `REGS->end[I]' to - the index just beyond that substring's end. The function sets - `REGS->start[0]' and `REGS->end[0]' to analogous information about - the entire pattern. - - For example, when you match `((a)(b))' against `ab', you get: - - * 0 in `REGS->start[0]' and 2 in `REGS->end[0]' - - * 0 in `REGS->start[1]' and 2 in `REGS->end[1]' - - * 0 in `REGS->start[2]' and 1 in `REGS->end[2]' - - * 1 in `REGS->start[3]' and 2 in `REGS->end[3]' - - * If a group matches more than once (as it might if followed by, - e.g., a repetition operator), then the function reports the - information about what the group *last* matched. - - For example, when you match the pattern `(a)*' against the string - `aa', you get: - - * 0 in `REGS->start[0]' and 2 in `REGS->end[0]' - - * 1 in `REGS->start[1]' and 2 in `REGS->end[1]' - - * If the I-th group does not participate in a successful match, - e.g., it is an alternative not taken or a repetition operator - allows zero repetitions of it, then the function sets - `REGS->start[I]' and `REGS->end[I]' to -1. - - For example, when you match the pattern `(a)*b' against the string - `b', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * -1 in `REGS->start[1]' and -1 in `REGS->end[1]' - - * If the I-th group matches a zero-length string, then the function - sets `REGS->start[I]' and `REGS->end[I]' to the index just beyond - that zero-length string. - - For example, when you match the pattern `(a*)b' against the string - `b', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * 0 in `REGS->start[1]' and 0 in `REGS->end[1]' - - * If an I-th group contains a J-th group in turn not contained - within any other group within group I and the function reports a - match of the I-th group, then it records in `REGS->start[J]' and - `REGS->end[J]' the last match (if it matched) of the J-th group. - - For example, when you match the pattern `((a*)b)*' against the - string `abb', group 2 last matches the empty string, so you get - what it previously matched: - - * 0 in `REGS->start[0]' and 3 in `REGS->end[0]' - - * 2 in `REGS->start[1]' and 3 in `REGS->end[1]' - - * 2 in `REGS->start[2]' and 2 in `REGS->end[2]' - - When you match the pattern `((a)*b)*' against the string `abb', - group 2 doesn't participate in the last match, so you get: - - * 0 in `REGS->start[0]' and 3 in `REGS->end[0]' - - * 2 in `REGS->start[1]' and 3 in `REGS->end[1]' - - * 0 in `REGS->start[2]' and 1 in `REGS->end[2]' - - * If an I-th group contains a J-th group in turn not contained - within any other group within group I and the function sets - `REGS->start[I]' and `REGS->end[I]' to -1, then it also sets - `REGS->start[J]' and `REGS->end[J]' to -1. - - For example, when you match the pattern `((a)*b)*c' against the - string `c', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * -1 in `REGS->start[1]' and -1 in `REGS->end[1]' - - * -1 in `REGS->start[2]' and -1 in `REGS->end[2]' - - -File: regex.info, Node: Freeing GNU Pattern Buffers, Prev: Using Registers, Up: GNU Regex Functions - -Freeing GNU Pattern Buffers ---------------------------- - - To free any allocated fields of a pattern buffer, you can use the -POSIX function described in *Note Freeing POSIX Pattern Buffers::, -since the type `regex_t'--the type for POSIX pattern buffers--is -equivalent to the type `re_pattern_buffer'. After freeing a pattern -buffer, you need to again compile a regular expression in it (*note GNU -Regular Expression Compiling::.) before passing it to a matching or -searching function. - - -File: regex.info, Node: POSIX Regex Functions, Next: BSD Regex Functions, Prev: GNU Regex Functions, Up: Programming with Regex - -POSIX Regex Functions -===================== - - If you're writing code that has to be POSIX compatible, you'll need -to use these functions. Their interfaces are as specified by POSIX, -draft 1003.2/D11.2. - -* Menu: - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - - -File: regex.info, Node: POSIX Pattern Buffers, Next: POSIX Regular Expression Compiling, Up: POSIX Regex Functions - -POSIX Pattern Buffers ---------------------- - - To compile or match a given regular expression the POSIX way, you -must supply a pattern buffer exactly the way you do for GNU (*note GNU -Pattern Buffers::.). POSIX pattern buffers have type `regex_t', which -is equivalent to the GNU pattern buffer type `re_pattern_buffer'. - - -File: regex.info, Node: POSIX Regular Expression Compiling, Next: POSIX Matching, Prev: POSIX Pattern Buffers, Up: POSIX Regex Functions - -POSIX Regular Expression Compiling ----------------------------------- - - With POSIX, you can only search for a given regular expression; you -can't match it. To do this, you must first compile it in a pattern -buffer, using `regcomp'. - - To compile a pattern buffer, use: - - int - regcomp (regex_t *PREG, const char *REGEX, int CFLAGS) - -PREG is the initialized pattern buffer's address, REGEX is the regular -expression's address, and CFLAGS is the compilation flags, which Regex -considers as a collection of bits. Here are the valid bits, as defined -in `regex.h': - -`REG_EXTENDED' - says to use POSIX Extended Regular Expression syntax; if this isn't - set, then says to use POSIX Basic Regular Expression syntax. - `regcomp' sets PREG's `syntax' field accordingly. - -`REG_ICASE' - says to ignore case; `regcomp' sets PREG's `translate' field to a - translate table which ignores case, replacing anything you've put - there before. - -`REG_NOSUB' - says to set PREG's `no_sub' field; *note POSIX Matching::., for - what this means. - -`REG_NEWLINE' - says that a: - - * match-any-character operator (*note Match-any-character - Operator::.) doesn't match a newline. - - * nonmatching list not containing a newline (*note List - Operators::.) matches a newline. - - * match-beginning-of-line operator (*note - Match-beginning-of-line Operator::.) matches the empty string - immediately after a newline, regardless of how `REG_NOTBOL' - is set (*note POSIX Matching::., for an explanation of - `REG_NOTBOL'). - - * match-end-of-line operator (*note Match-beginning-of-line - Operator::.) matches the empty string immediately before a - newline, regardless of how `REG_NOTEOL' is set (*note POSIX - Matching::., for an explanation of `REG_NOTEOL'). - - If `regcomp' successfully compiles the regular expression, it returns -zero and sets `*PATTERN_BUFFER' to the compiled pattern. Except for -`syntax' (which it sets as explained above), it also sets the same -fields the same way as does the GNU compiling function (*note GNU -Regular Expression Compiling::.). - - If `regcomp' can't compile the regular expression, it returns one of -the error codes listed here. (Except when noted differently, the -syntax of in all examples below is basic regular expression syntax.) - -`REG_BADRPT' - For example, the consecutive repetition operators `**' in `a**' - are invalid. As another example, if the syntax is extended - regular expression syntax, then the repetition operator `*' with - nothing on which to operate in `*' is invalid. - -`REG_BADBR' - For example, the COUNT `-1' in `a\{-1' is invalid. - -`REG_EBRACE' - For example, `a\{1' is missing a close-interval operator. - -`REG_EBRACK' - For example, `[a' is missing a close-list operator. - -`REG_ERANGE' - For example, the range ending point `z' that collates lower than - does its starting point `a' in `[z-a]' is invalid. Also, the - range with the character class `[:alpha:]' as its starting point in - `[[:alpha:]-|]'. - -`REG_ECTYPE' - For example, the character class name `foo' in `[[:foo:]' is - invalid. - -`REG_EPAREN' - For example, `a\)' is missing an open-group operator and `\(a' is - missing a close-group operator. - -`REG_ESUBREG' - For example, the back reference `\2' that refers to a nonexistent - subexpression in `\(a\)\2' is invalid. - -`REG_EEND' - Returned when a regular expression causes no other more specific - error. - -`REG_EESCAPE' - For example, the trailing backslash `\' in `a\' is invalid, as is - the one in `\'. - -`REG_BADPAT' - For example, in the extended regular expression syntax, the empty - group `()' in `a()b' is invalid. - -`REG_ESIZE' - Returned when a regular expression needs a pattern buffer larger - than 65536 bytes. - -`REG_ESPACE' - Returned when a regular expression makes Regex to run out of - memory. - - -File: regex.info, Node: POSIX Matching, Next: Reporting Errors, Prev: POSIX Regular Expression Compiling, Up: POSIX Regex Functions - -POSIX Matching --------------- - - Matching the POSIX way means trying to match a null-terminated string -starting at its first character. Once you've compiled a pattern into a -pattern buffer (*note POSIX Regular Expression Compiling::.), you can -ask the matcher to match that pattern against a string using: - - int - regexec (const regex_t *PREG, const char *STRING, - size_t NMATCH, regmatch_t PMATCH[], int EFLAGS) - -PREG is the address of a pattern buffer for a compiled pattern. STRING -is the string you want to match. - - *Note Using Byte Offsets::, for an explanation of PMATCH. If you -pass zero for NMATCH or you compiled PREG with the compilation flag -`REG_NOSUB' set, then `regexec' will ignore PMATCH; otherwise, you must -allocate it to have at least NMATCH elements. `regexec' will record -NMATCH byte offsets in PMATCH, and set to -1 any unused elements up to -PMATCH`[NMATCH]' - 1. - - EFLAGS specifies "execution flags"--namely, the two bits `REG_NOTBOL' -and `REG_NOTEOL' (defined in `regex.h'). If you set `REG_NOTBOL', then -the match-beginning-of-line operator (*note Match-beginning-of-line -Operator::.) always fails to match. This lets you match against pieces -of a line, as you would need to if, say, searching for repeated -instances of a given pattern in a line; it would work correctly for -patterns both with and without match-beginning-of-line operators. -`REG_NOTEOL' works analogously for the match-end-of-line operator -(*note Match-end-of-line Operator::.); it exists for symmetry. - - `regexec' tries to find a match for PREG in STRING according to the -syntax in PREG's `syntax' field. (*Note POSIX Regular Expression -Compiling::, for how to set it.) The function returns zero if the -compiled pattern matches STRING and `REG_NOMATCH' (defined in -`regex.h') if it doesn't. - - -File: regex.info, Node: Reporting Errors, Next: Using Byte Offsets, Prev: POSIX Matching, Up: POSIX Regex Functions - -Reporting Errors ----------------- - - If either `regcomp' or `regexec' fail, they return a nonzero error -code, the possibilities for which are defined in `regex.h'. *Note -POSIX Regular Expression Compiling::, and *Note POSIX Matching::, for -what these codes mean. To get an error string corresponding to these -codes, you can use: - - size_t - regerror (int ERRCODE, - const regex_t *PREG, - char *ERRBUF, - size_t ERRBUF_SIZE) - -ERRCODE is an error code, PREG is the address of the pattern buffer -which provoked the error, ERRBUF is the error buffer, and ERRBUF_SIZE -is ERRBUF's size. - - `regerror' returns the size in bytes of the error string -corresponding to ERRCODE (including its terminating null). If ERRBUF -and ERRBUF_SIZE are nonzero, it also returns in ERRBUF the first -ERRBUF_SIZE - 1 characters of the error string, followed by a null. -eRRBUF_SIZE must be a nonnegative number less than or equal to the size -in bytes of ERRBUF. - - You can call `regerror' with a null ERRBUF and a zero ERRBUF_SIZE to -determine how large ERRBUF need be to accommodate `regerror''s error -string. - - -File: regex.info, Node: Using Byte Offsets, Next: Freeing POSIX Pattern Buffers, Prev: Reporting Errors, Up: POSIX Regex Functions - -Using Byte Offsets ------------------- - - In POSIX, variables of type `regmatch_t' hold analogous information, -but are not identical to, GNU's registers (*note Using Registers::.). -To get information about registers in POSIX, pass to `regexec' a -nonzero PMATCH of type `regmatch_t', i.e., the address of a structure -of this type, defined in `regex.h': - - typedef struct - { - regoff_t rm_so; - regoff_t rm_eo; - } regmatch_t; - - When reading in *Note Using Registers::, about how the matching -function stores the information into the registers, substitute PMATCH -for REGS, `PMATCH[I]->rm_so' for `REGS->start[I]' and -`PMATCH[I]->rm_eo' for `REGS->end[I]'. - - -File: regex.info, Node: Freeing POSIX Pattern Buffers, Prev: Using Byte Offsets, Up: POSIX Regex Functions - -Freeing POSIX Pattern Buffers ------------------------------ - - To free any allocated fields of a pattern buffer, use: - - void - regfree (regex_t *PREG) - -PREG is the pattern buffer whose allocated fields you want freed. -`regfree' also sets PREG's `allocated' and `used' fields to zero. -After freeing a pattern buffer, you need to again compile a regular -expression in it (*note POSIX Regular Expression Compiling::.) before -passing it to the matching function (*note POSIX Matching::.). - - -File: regex.info, Node: BSD Regex Functions, Prev: POSIX Regex Functions, Up: Programming with Regex - -BSD Regex Functions -=================== - - If you're writing code that has to be Berkeley UNIX compatible, -you'll need to use these functions whose interfaces are the same as -those in Berkeley UNIX. - -* Menu: - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () - - -File: regex.info, Node: BSD Regular Expression Compiling, Next: BSD Searching, Up: BSD Regex Functions - -BSD Regular Expression Compiling --------------------------------- - - With Berkeley UNIX, you can only search for a given regular -expression; you can't match one. To search for it, you must first -compile it. Before you compile it, you must indicate the regular -expression syntax you want it compiled according to by setting the -variable `re_syntax_options' (declared in `regex.h' to some syntax -(*note Regular Expression Syntax::.). - - To compile a regular expression use: - - char * - re_comp (char *REGEX) - -REGEX is the address of a null-terminated regular expression. -`re_comp' uses an internal pattern buffer, so you can use only the most -recently compiled pattern buffer. This means that if you want to use a -given regular expression that you've already compiled--but it isn't the -latest one you've compiled--you'll have to recompile it. If you call -`re_comp' with the null string (*not* the empty string) as the -argument, it doesn't change the contents of the pattern buffer. - - If `re_comp' successfully compiles the regular expression, it returns -zero. If it can't compile the regular expression, it returns an error -string. `re_comp''s error messages are identical to those of -`re_compile_pattern' (*note GNU Regular Expression Compiling::.). - - -File: regex.info, Node: BSD Searching, Prev: BSD Regular Expression Compiling, Up: BSD Regex Functions - -BSD Searching -------------- - - Searching the Berkeley UNIX way means searching in a string starting -at its first character and trying successive positions within it to -find a match. Once you've compiled a pattern using `re_comp' (*note -BSD Regular Expression Compiling::.), you can ask Regex to search for -that pattern in a string using: - - int - re_exec (char *STRING) - -STRING is the address of the null-terminated string in which you want -to search. - - `re_exec' returns either 1 for success or 0 for failure. It -automatically uses a GNU fastmap (*note Searching with Fastmaps::.). - - -File: regex.info, Node: Copying, Next: Index, Prev: Programming with Regex, Up: Top - -GNU GENERAL PUBLIC LICENSE -************************** - - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -Preamble -======== - - The licenses for most software are designed to take away your freedom -to share and change it. By contrast, the GNU General Public License is -intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it in -new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 1. This License applies to any program or other work which contains a - notice placed by the copyright holder saying it may be distributed - under the terms of this General Public License. The "Program", - below, refers to any such program or work, and a "work based on - the Program" means either the Program or any derivative work under - copyright law: that is to say, a work containing the Program or a - portion of it, either verbatim or with modifications and/or - translated into another language. (Hereinafter, translation is - included without limitation in the term "modification".) Each - licensee is addressed as "you". - - Activities other than copying, distribution and modification are - not covered by this License; they are outside its scope. The act - of running the Program is not restricted, and the output from the - Program is covered only if its contents constitute a work based on - the Program (independent of having been made by running the - Program). Whether that is true depends on what the Program does. - - 2. You may copy and distribute verbatim copies of the Program's - source code as you receive it, in any medium, provided that you - conspicuously and appropriately publish on each copy an appropriate - copyright notice and disclaimer of warranty; keep intact all the - notices that refer to this License and to the absence of any - warranty; and give any other recipients of the Program a copy of - this License along with the Program. - - You may charge a fee for the physical act of transferring a copy, - and you may at your option offer warranty protection in exchange - for a fee. - - 3. You may modify your copy or copies of the Program or any portion - of it, thus forming a work based on the Program, and copy and - distribute such modifications or work under the terms of Section 1 - above, provided that you also meet all of these conditions: - - a. You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b. You must cause any work that you distribute or publish, that - in whole or in part contains or is derived from the Program - or any part thereof, to be licensed as a whole at no charge - to all third parties under the terms of this License. - - c. If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display - an announcement including an appropriate copyright notice and - a notice that there is no warranty (or else, saying that you - provide a warranty) and that users may redistribute the - program under these conditions, and telling the user how to - view a copy of this License. (Exception: if the Program - itself is interactive but does not normally print such an - announcement, your work based on the Program is not required - to print an announcement.) - - These requirements apply to the modified work as a whole. If - identifiable sections of that work are not derived from the - Program, and can be reasonably considered independent and separate - works in themselves, then this License, and its terms, do not - apply to those sections when you distribute them as separate - works. But when you distribute the same sections as part of a - whole which is a work based on the Program, the distribution of - the whole must be on the terms of this License, whose permissions - for other licensees extend to the entire whole, and thus to each - and every part regardless of who wrote it. - - Thus, it is not the intent of this section to claim rights or - contest your rights to work written entirely by you; rather, the - intent is to exercise the right to control the distribution of - derivative or collective works based on the Program. - - In addition, mere aggregation of another work not based on the - Program with the Program (or with a work based on the Program) on - a volume of a storage or distribution medium does not bring the - other work under the scope of this License. - - 4. You may copy and distribute the Program (or a work based on it, - under Section 2) in object code or executable form under the terms - of Sections 1 and 2 above provided that you also do one of the - following: - - a. Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of - Sections 1 and 2 above on a medium customarily used for - software interchange; or, - - b. Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a - medium customarily used for software interchange; or, - - c. Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with - such an offer, in accord with Subsection b above.) - - The source code for a work means the preferred form of the work for - making modifications to it. For an executable work, complete - source code means all the source code for all modules it contains, - plus any associated interface definition files, plus the scripts - used to control compilation and installation of the executable. - However, as a special exception, the source code distributed need - not include anything that is normally distributed (in either - source or binary form) with the major components (compiler, - kernel, and so on) of the operating system on which the executable - runs, unless that component itself accompanies the executable. - - If distribution of executable or object code is made by offering - access to copy from a designated place, then offering equivalent - access to copy the source code from the same place counts as - distribution of the source code, even though third parties are not - compelled to copy the source along with the object code. - - 5. You may not copy, modify, sublicense, or distribute the Program - except as expressly provided under this License. Any attempt - otherwise to copy, modify, sublicense or distribute the Program is - void, and will automatically terminate your rights under this - License. However, parties who have received copies, or rights, - from you under this License will not have their licenses - terminated so long as such parties remain in full compliance. - - 6. You are not required to accept this License, since you have not - signed it. However, nothing else grants you permission to modify - or distribute the Program or its derivative works. These actions - are prohibited by law if you do not accept this License. - Therefore, by modifying or distributing the Program (or any work - based on the Program), you indicate your acceptance of this - License to do so, and all its terms and conditions for copying, - distributing or modifying the Program or works based on it. - - 7. Each time you redistribute the Program (or any work based on the - Program), the recipient automatically receives a license from the - original licensor to copy, distribute or modify the Program - subject to these terms and conditions. You may not impose any - further restrictions on the recipients' exercise of the rights - granted herein. You are not responsible for enforcing compliance - by third parties to this License. - - 8. If, as a consequence of a court judgment or allegation of patent - infringement or for any other reason (not limited to patent - issues), conditions are imposed on you (whether by court order, - agreement or otherwise) that contradict the conditions of this - License, they do not excuse you from the conditions of this - License. If you cannot distribute so as to satisfy simultaneously - your obligations under this License and any other pertinent - obligations, then as a consequence you may not distribute the - Program at all. For example, if a patent license would not permit - royalty-free redistribution of the Program by all those who - receive copies directly or indirectly through you, then the only - way you could satisfy both it and this License would be to refrain - entirely from distribution of the Program. - - If any portion of this section is held invalid or unenforceable - under any particular circumstance, the balance of the section is - intended to apply and the section as a whole is intended to apply - in other circumstances. - - It is not the purpose of this section to induce you to infringe any - patents or other property right claims or to contest validity of - any such claims; this section has the sole purpose of protecting - the integrity of the free software distribution system, which is - implemented by public license practices. Many people have made - generous contributions to the wide range of software distributed - through that system in reliance on consistent application of that - system; it is up to the author/donor to decide if he or she is - willing to distribute software through any other system and a - licensee cannot impose that choice. - - This section is intended to make thoroughly clear what is believed - to be a consequence of the rest of this License. - - 9. If the distribution and/or use of the Program is restricted in - certain countries either by patents or by copyrighted interfaces, - the original copyright holder who places the Program under this - License may add an explicit geographical distribution limitation - excluding those countries, so that distribution is permitted only - in or among countries not thus excluded. In such case, this - License incorporates the limitation as if written in the body of - this License. - - 10. The Free Software Foundation may publish revised and/or new - versions of the General Public License from time to time. Such - new versions will be similar in spirit to the present version, but - may differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the - Program specifies a version number of this License which applies - to it and "any later version", you have the option of following - the terms and conditions either of that version or of any later - version published by the Free Software Foundation. If the Program - does not specify a version number of this License, you may choose - any version ever published by the Free Software Foundation. - - 11. If you wish to incorporate parts of the Program into other free - programs whose distribution conditions are different, write to the - author to ask for permission. For software which is copyrighted - by the Free Software Foundation, write to the Free Software - Foundation; we sometimes make exceptions for this. Our decision - will be guided by the two goals of preserving the free status of - all derivatives of our free software and of promoting the sharing - and reuse of software generally. - - NO WARRANTY - - 12. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO - WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE - LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT - HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT - WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT - NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE - QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE - PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY - SERVICING, REPAIR OR CORRECTION. - - 13. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN - WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY - MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE - LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, - INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR - INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF - DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU - OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY - OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - -Appendix: How to Apply These Terms to Your New Programs -======================================================= - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. - Copyright (C) 19YY NAME OF AUTHOR - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Also add information on how to contact you by electronic and paper -mail. - - If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - - The hypothetical commands `show w' and `show c' should show the -appropriate parts of the General Public License. Of course, the -commands you use may be called something other than `show w' and `show -c'; they could even be mouse-clicks or menu items--whatever suits your -program. - - You should also get your employer (if you work as a programmer) or -your school, if any, to sign a "copyright disclaimer" for the program, -if necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - SIGNATURE OF TY COON, 1 April 1989 - Ty Coon, President of Vice - - This General Public License does not permit incorporating your -program into proprietary programs. If your program is a subroutine -library, you may consider it more useful to permit linking proprietary -applications with the library. If this is what you want to do, use the -GNU Library General Public License instead of this License. - - -File: regex.info, Node: Index, Prev: Copying, Up: Top - -Index -***** - -* Menu: - -* $: Match-end-of-line Operator. -* (: Grouping Operators. -* ): Grouping Operators. -* *: Match-zero-or-more Operator. -* +: Match-one-or-more Operator. -* -: List Operators. -* .: Match-any-character Operator. -* :] in regex: Character Class Operators. -* ?: Match-zero-or-one Operator. -* {: Interval Operators. -* }: Interval Operators. -* [: in regex: Character Class Operators. -* [^: List Operators. -* [: List Operators. -* \': Match-end-of-buffer Operator. -* \<: Match-beginning-of-word Operator. -* \>: Match-end-of-word Operator. -* \{: Interval Operators. -* \}: Interval Operators. -* \b: Match-word-boundary Operator. -* \B: Match-within-word Operator. -* \s: Match-syntactic-class Operator. -* \S: Match-not-syntactic-class Operator. -* \w: Match-word-constituent Operator. -* \W: Match-non-word-constituent Operator. -* \`: Match-beginning-of-buffer Operator. -* \: List Operators. -* ]: List Operators. -* ^: List Operators. -* allocated initialization: GNU Regular Expression Compiling. -* alternation operator: Alternation Operator. -* alternation operator and ^: Match-beginning-of-line Operator. -* anchoring: Anchoring Operators. -* anchors: Match-end-of-line Operator. -* anchors: Match-beginning-of-line Operator. -* Awk: Predefined Syntaxes. -* back references: Back-reference Operator. -* backtracking: Match-zero-or-more Operator. -* backtracking: Alternation Operator. -* beginning-of-line operator: Match-beginning-of-line Operator. -* bracket expression: List Operators. -* buffer field, set by re_compile_pattern: GNU Regular Expression Compiling. -* buffer initialization: GNU Regular Expression Compiling. -* character classes: Character Class Operators. -* Egrep: Predefined Syntaxes. -* Emacs: Predefined Syntaxes. -* end in struct re_registers: Using Registers. -* end-of-line operator: Match-end-of-line Operator. -* fastmap initialization: GNU Regular Expression Compiling. -* fastmaps: Searching with Fastmaps. -* fastmap_accurate field, set by re_compile_pattern: GNU Regular Expression Compiling. -* Grep: Predefined Syntaxes. -* grouping: Grouping Operators. -* ignoring case: POSIX Regular Expression Compiling. -* interval expression: Interval Operators. -* matching list: List Operators. -* matching newline: List Operators. -* matching with GNU functions: GNU Matching. -* newline_anchor field in pattern buffer: Match-beginning-of-line Operator. -* nonmatching list: List Operators. -* not_bol field in pattern buffer: Match-beginning-of-line Operator. -* num_regs in struct re_registers: Using Registers. -* open-group operator and ^: Match-beginning-of-line Operator. -* or operator: Alternation Operator. -* parenthesizing: Grouping Operators. -* pattern buffer initialization: GNU Regular Expression Compiling. -* pattern buffer, definition of: GNU Pattern Buffers. -* POSIX Awk: Predefined Syntaxes. -* range argument to re_search: GNU Searching. -* regex.c: Overview. -* regex.h: Overview. -* regexp anchoring: Anchoring Operators. -* regmatch_t: Using Byte Offsets. -* regs_allocated: Using Registers. -* REGS_FIXED: Using Registers. -* REGS_REALLOCATE: Using Registers. -* REGS_UNALLOCATED: Using Registers. -* regular expressions, syntax of: Regular Expression Syntax. -* REG_EXTENDED: POSIX Regular Expression Compiling. -* REG_ICASE: POSIX Regular Expression Compiling. -* REG_NEWLINE: POSIX Regular Expression Compiling. -* REG_NOSUB: POSIX Regular Expression Compiling. -* RE_BACKSLASH_ESCAPE_IN_LIST: Syntax Bits. -* RE_BK_PLUS_QM: Syntax Bits. -* RE_CHAR_CLASSES: Syntax Bits. -* RE_CONTEXT_INDEP_ANCHORS: Syntax Bits. -* RE_CONTEXT_INDEP_ANCHORS (and ^): Match-beginning-of-line Operator. -* RE_CONTEXT_INDEP_OPS: Syntax Bits. -* RE_CONTEXT_INVALID_OPS: Syntax Bits. -* RE_DOT_NEWLINE: Syntax Bits. -* RE_DOT_NOT_NULL: Syntax Bits. -* RE_INTERVALS: Syntax Bits. -* RE_LIMITED_OPS: Syntax Bits. -* RE_NEWLINE_ALT: Syntax Bits. -* RE_NO_BK_BRACES: Syntax Bits. -* RE_NO_BK_PARENS: Syntax Bits. -* RE_NO_BK_REFS: Syntax Bits. -* RE_NO_BK_VBAR: Syntax Bits. -* RE_NO_EMPTY_RANGES: Syntax Bits. -* re_nsub field, set by re_compile_pattern: GNU Regular Expression Compiling. -* re_pattern_buffer definition: GNU Pattern Buffers. -* re_registers: Using Registers. -* re_syntax_options initialization: GNU Regular Expression Compiling. -* RE_UNMATCHED_RIGHT_PAREN_ORD: Syntax Bits. -* searching with GNU functions: GNU Searching. -* start argument to re_search: GNU Searching. -* start in struct re_registers: Using Registers. -* struct re_pattern_buffer definition: GNU Pattern Buffers. -* subexpressions: Grouping Operators. -* syntax field, set by re_compile_pattern: GNU Regular Expression Compiling. -* syntax bits: Syntax Bits. -* syntax initialization: GNU Regular Expression Compiling. -* syntax of regular expressions: Regular Expression Syntax. -* translate initialization: GNU Regular Expression Compiling. -* used field, set by re_compile_pattern: GNU Regular Expression Compiling. -* word boundaries, matching: Match-word-boundary Operator. -* \: The Backslash Character. -* \(: Grouping Operators. -* \): Grouping Operators. -* \|: Alternation Operator. -* ^: Match-beginning-of-line Operator. -* |: Alternation Operator. - - - -Tag Table: -Node: Top1064 -Node: Overview4562 -Node: Regular Expression Syntax6746 -Node: Syntax Bits7916 -Node: Predefined Syntaxes14018 -Node: Collating Elements vs. Characters17872 -Node: The Backslash Character18835 -Node: Common Operators21992 -Node: Match-self Operator23445 -Node: Match-any-character Operator23941 -Node: Concatenation Operator24520 -Node: Repetition Operators25017 -Node: Match-zero-or-more Operator25436 -Node: Match-one-or-more Operator27483 -Node: Match-zero-or-one Operator28341 -Node: Interval Operators29196 -Node: Alternation Operator30991 -Node: List Operators32489 -Node: Character Class Operators35272 -Node: Range Operator36901 -Node: Grouping Operators38930 -Node: Back-reference Operator40251 -Node: Anchoring Operators43073 -Node: Match-beginning-of-line Operator43447 -Node: Match-end-of-line Operator44779 -Node: GNU Operators45518 -Node: Word Operators45767 -Node: Non-Emacs Syntax Tables46391 -Node: Match-word-boundary Operator47465 -Node: Match-within-word Operator47858 -Node: Match-beginning-of-word Operator48255 -Node: Match-end-of-word Operator48588 -Node: Match-word-constituent Operator48908 -Node: Match-non-word-constituent Operator49234 -Node: Buffer Operators49545 -Node: Match-beginning-of-buffer Operator49952 -Node: Match-end-of-buffer Operator50264 -Node: GNU Emacs Operators50558 -Node: Syntactic Class Operators50901 -Node: Emacs Syntax Tables51307 -Node: Match-syntactic-class Operator51963 -Node: Match-not-syntactic-class Operator52560 -Node: What Gets Matched?53150 -Node: Programming with Regex53799 -Node: GNU Regex Functions54237 -Node: GNU Pattern Buffers55078 -Node: GNU Regular Expression Compiling58303 -Node: GNU Matching61181 -Node: GNU Searching63101 -Node: Matching/Searching with Split Data64913 -Node: Searching with Fastmaps66369 -Node: GNU Translate Tables68921 -Node: Using Registers70892 -Node: Freeing GNU Pattern Buffers77000 -Node: POSIX Regex Functions77593 -Node: POSIX Pattern Buffers78266 -Node: POSIX Regular Expression Compiling78709 -Node: POSIX Matching82836 -Node: Reporting Errors84791 -Node: Using Byte Offsets86048 -Node: Freeing POSIX Pattern Buffers86861 -Node: BSD Regex Functions87467 -Node: BSD Regular Expression Compiling87886 -Node: BSD Searching89258 -Node: Copying89960 -Node: Index109122 - -End Tag Table diff --git a/regex-0.12/doc/regex.texi b/regex-0.12/doc/regex.texi @@ -1,3138 +0,0 @@ -\input texinfo -@c %**start of header -@setfilename regex.info -@settitle Regex -@c %**end of header - -@c \\{fill-paragraph} works better (for me, anyway) if the text in the -@c source file isn't indented. -@paragraphindent 2 - -@c Define a new index for our magic constants. -@defcodeindex cn - -@c Put everything in one index (arbitrarily chosen to be the concept index). -@syncodeindex cn cp -@syncodeindex ky cp -@syncodeindex pg cp -@syncodeindex tp cp -@syncodeindex vr cp - -@c Here is what we use in the Info `dir' file: -@c * Regex: (regex). Regular expression library. - - -@ifinfo -This file documents the GNU regular expression library. - -Copyright (C) 1992, 1993 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries a copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). -@end ignore - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. -@end ifinfo - - -@titlepage - -@title Regex -@subtitle edition 0.12a -@subtitle 19 September 1992 -@author Kathryn A. Hargreaves -@author Karl Berry - -@page - -@vskip 0pt plus 1filll -Copyright @copyright{} 1992 Free Software Foundation. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this -one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. - -@end titlepage - - -@ifinfo -@node Top, Overview, (dir), (dir) -@top Regular Expression Library - -This manual documents how to program with the GNU regular expression -library. This is edition 0.12a of the manual, 19 September 1992. - -The first part of this master menu lists the major nodes in this Info -document, including the index. The rest of the menu lists all the -lower level nodes in the document. - -@menu -* Overview:: -* Regular Expression Syntax:: -* Common Operators:: -* GNU Operators:: -* GNU Emacs Operators:: -* What Gets Matched?:: -* Programming with Regex:: -* Copying:: Copying and sharing Regex. -* Index:: General index. - --- The Detailed Node Listing --- - -Regular Expression Syntax - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - -Common Operators - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - -Repetition Operators - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} - -List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - -Anchoring Operators - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - -GNU Operators - -* Word Operators:: -* Buffer Operators:: - -Word Operators - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - -Buffer Operators - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - -GNU Emacs Operators - -* Syntactic Class Operators:: - -Syntactic Class Operators - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - -Programming with Regex - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - -GNU Regex Functions - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - -POSIX Regex Functions - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - -BSD Regex Functions - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu -@end ifinfo -@node Overview, Regular Expression Syntax, Top, Top -@chapter Overview - -A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text -string that describes some (mathematical) set of strings. A regexp -@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of -strings described by @var{r}. - -Using the Regex library, you can: - -@itemize @bullet - -@item -see if a string matches a specified pattern as a whole, and - -@item -search within a string for a substring matching a specified pattern. - -@end itemize - -Some regular expressions match only one string, i.e., the set they -describe has only one member. For example, the regular expression -@samp{foo} matches the string @samp{foo} and no others. Other regular -expressions match more than one string, i.e., the set they describe has -more than one member. For example, the regular expression @samp{f*} -matches the set of strings made up of any number (including zero) of -@samp{f}s. As you can see, some characters in regular expressions match -themselves (such as @samp{f}) and some don't (such as @samp{*}); the -ones that don't match themselves instead let you specify patterns that -describe many different strings. - -To either match or search for a regular expression with the Regex -library functions, you must first compile it with a Regex pattern -compiling function. A @dfn{compiled pattern} is a regular expression -converted to the internal format used by the library functions. Once -you've compiled a pattern, you can use it for matching or searching any -number of times. - -The Regex library consists of two source files: @file{regex.h} and -@file{regex.c}. -@pindex regex.h -@pindex regex.c -Regex provides three groups of functions with which you can operate on -regular expressions. One group---the @sc{gnu} group---is more powerful -but not completely compatible with the other two, namely the @sc{posix} -and Berkeley @sc{unix} groups; its interface was designed specifically -for @sc{gnu}. The other groups have the same interfaces as do the -regular expression functions in @sc{posix} and Berkeley -@sc{unix}. - -We wrote this chapter with programmers in mind, not users of -programs---such as Emacs---that use Regex. We describe the Regex -library in its entirety, not how to write regular expressions that a -particular program understands. - - -@node Regular Expression Syntax, Common Operators, Overview, Top -@chapter Regular Expression Syntax - -@cindex regular expressions, syntax of -@cindex syntax of regular expressions - -@dfn{Characters} are things you can type. @dfn{Operators} are things in -a regular expression that match one or more characters. You compose -regular expressions from operators, which in turn you specify using one -or more characters. - -Most characters represent what we call the match-self operator, i.e., -they match themselves; we call these characters @dfn{ordinary}. Other -characters represent either all or parts of fancier operators; e.g., -@samp{.} represents what we call the match-any-character operator -(which, no surprise, matches (almost) any character); we call these -characters @dfn{special}. Two different things determine what -characters represent what operators: - -@enumerate -@item -the regular expression syntax your program has told the Regex library to -recognize, and - -@item -the context of the character in the regular expression. -@end enumerate - -In the following sections, we describe these things in more detail. - -@menu -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: -@end menu - - -@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax -@section Syntax Bits - -@cindex syntax bits - -In any particular syntax for regular expressions, some characters are -always special, others are sometimes special, and others are never -special. The particular syntax that Regex recognizes for a given -regular expression depends on the value in the @code{syntax} field of -the pattern buffer of that regular expression. - -You get a pattern buffer by compiling a regular expression. @xref{GNU -Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information -on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX -Regular Expression Compiling}, and @ref{BSD Regular Expression -Compiling}, for more information on compiling. - -Regex considers the value of the @code{syntax} field to be a collection -of bits; we refer to these bits as @dfn{syntax bits}. In most cases, -they affect what characters represent what operators. We describe the -meanings of the operators to which we refer in @ref{Common Operators}, -@ref{GNU Operators}, and @ref{GNU Emacs Operators}. - -For reference, here is the complete list of syntax bits, in alphabetical -order: - -@table @code - -@cnindex RE_BACKSLASH_ESCAPE_IN_LIST -@item RE_BACKSLASH_ESCAPE_IN_LISTS -If this bit is set, then @samp{\} inside a list (@pxref{List Operators} -quotes (makes ordinary, if it's special) the following character; if -this bit isn't set, then @samp{\} is an ordinary character inside lists. -(@xref{The Backslash Character}, for what `\' does outside of lists.) - -@cnindex RE_BK_PLUS_QM -@item RE_BK_PLUS_QM -If this bit is set, then @samp{\+} represents the match-one-or-more -operator and @samp{\?} represents the match-zero-or-more operator; if -this bit isn't set, then @samp{+} represents the match-one-or-more -operator and @samp{?} represents the match-zero-or-one operator. This -bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_CHAR_CLASSES -@item RE_CHAR_CLASSES -If this bit is set, then you can use character classes in lists; if this -bit isn't set, then you can't. - -@cnindex RE_CONTEXT_INDEP_ANCHORS -@item RE_CONTEXT_INDEP_ANCHORS -If this bit is set, then @samp{^} and @samp{$} are special anywhere outside -a list; if this bit isn't set, then these characters are special only in -certain contexts. @xref{Match-beginning-of-line Operator}, and -@ref{Match-end-of-line Operator}. - -@cnindex RE_CONTEXT_INDEP_OPS -@item RE_CONTEXT_INDEP_OPS -If this bit is set, then certain characters are special anywhere outside -a list; if this bit isn't set, then those characters are special only in -some contexts and are ordinary elsewhere. Specifically, if this bit -isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS} -isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending -on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators -only if they're not first in a regular expression or just after an -open-group or alternation operator. The same holds for @samp{@{} (or -@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if -it is the beginning of a valid interval and the syntax bit -@code{RE_INTERVALS} is set. - -@cnindex RE_CONTEXT_INVALID_OPS -@item RE_CONTEXT_INVALID_OPS -If this bit is set, then repetition and alternation operators can't be -in certain positions within a regular expression. Specifically, the -regular expression is invalid if it has: - -@itemize @bullet - -@item -a repetition operator first in the regular expression or just after a -match-beginning-of-line, open-group, or alternation operator; or - -@item -an alternation operator first or last in the regular expression, just -before a match-end-of-line operator, or just after an alternation or -open-group operator. - -@end itemize - -If this bit isn't set, then you can put the characters representing the -repetition and alternation characters anywhere in a regular expression. -Whether or not they will in fact be operators in certain positions -depends on other syntax bits. - -@cnindex RE_DOT_NEWLINE -@item RE_DOT_NEWLINE -If this bit is set, then the match-any-character operator matches -a newline; if this bit isn't set, then it doesn't. - -@cnindex RE_DOT_NOT_NULL -@item RE_DOT_NOT_NULL -If this bit is set, then the match-any-character operator doesn't match -a null character; if this bit isn't set, then it does. - -@cnindex RE_INTERVALS -@item RE_INTERVALS -If this bit is set, then Regex recognizes interval operators; if this bit -isn't set, then it doesn't. - -@cnindex RE_LIMITED_OPS -@item RE_LIMITED_OPS -If this bit is set, then Regex doesn't recognize the match-one-or-more, -match-zero-or-one or alternation operators; if this bit isn't set, then -it does. - -@cnindex RE_NEWLINE_ALT -@item RE_NEWLINE_ALT -If this bit is set, then newline represents the alternation operator; if -this bit isn't set, then newline is ordinary. - -@cnindex RE_NO_BK_BRACES -@item RE_NO_BK_BRACES -If this bit is set, then @samp{@{} represents the open-interval operator -and @samp{@}} represents the close-interval operator; if this bit isn't -set, then @samp{\@{} represents the open-interval operator and -@samp{\@}} represents the close-interval operator. This bit is relevant -only if @code{RE_INTERVALS} is set. - -@cnindex RE_NO_BK_PARENS -@item RE_NO_BK_PARENS -If this bit is set, then @samp{(} represents the open-group operator and -@samp{)} represents the close-group operator; if this bit isn't set, then -@samp{\(} represents the open-group operator and @samp{\)} represents -the close-group operator. - -@cnindex RE_NO_BK_REFS -@item RE_NO_BK_REFS -If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as -the back reference operator; if this bit isn't set, then it does. - -@cnindex RE_NO_BK_VBAR -@item RE_NO_BK_VBAR -If this bit is set, then @samp{|} represents the alternation operator; -if this bit isn't set, then @samp{\|} represents the alternation -operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_NO_EMPTY_RANGES -@item RE_NO_EMPTY_RANGES -If this bit is set, then a regular expression with a range whose ending -point collates lower than its starting point is invalid; if this bit -isn't set, then Regex considers such a range to be empty. - -@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD -@item RE_UNMATCHED_RIGHT_PAREN_ORD -If this bit is set and the regular expression has no matching open-group -operator, then Regex considers what would otherwise be a close-group -operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}. - -@end table - - -@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax -@section Predefined Syntaxes - -If you're programming with Regex, you can set a pattern buffer's -(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers}) -@code{syntax} field either to an arbitrary combination of syntax bits -(@pxref{Syntax Bits}) or else to the configurations defined by Regex. -These configurations define the syntaxes used by certain -programs---@sc{gnu} Emacs, -@cindex Emacs -@sc{posix} Awk, -@cindex POSIX Awk -traditional Awk, -@cindex Awk -Grep, -@cindex Grep -@cindex Egrep -Egrep---in addition to syntaxes for @sc{posix} basic and extended -regular expressions. - -The predefined syntaxes--taken directly from @file{regex.h}---are: - -@example -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -@end example - -@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax -@section Collating Elements vs.@: Characters - -@sc{posix} generalizes the notion of a character to that of a -collating element. It defines a @dfn{collating element} to be ``a -sequence of one or more bytes defined in the current collating sequence -as a unit of collation.'' - -This generalizes the notion of a character in -two ways. First, a single character can map into two or more collating -elements. For example, the German -@tex -`\ss' -@end tex -@ifinfo -``es-zet'' -@end ifinfo -collates as the collating element @samp{s} followed by another collating -element @samp{s}. Second, two or more characters can map into one -collating element. For example, the Spanish @samp{ll} collates after -@samp{l} and before @samp{m}. - -Since @sc{posix}'s ``collating element'' preserves the essential idea of -a ``character,'' we use the latter, more familiar, term in this document. - -@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax -@section The Backslash Character - -@cindex \ -The @samp{\} character has one of four different meanings, depending on -the context in which you use it and what syntax bits are set -(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next -character, 3) introduce an operator, or 4) do nothing. - -@enumerate -@item -It stands for itself inside a list -(@pxref{List Operators}) if the syntax bit -@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]} -would match @samp{\}. - -@item -It quotes (makes ordinary, if it's special) the next character when you -use it either: - -@itemize @bullet -@item -outside a list,@footnote{Sometimes -you don't have to explicitly quote special characters to make -them ordinary. For instance, most characters lose any special meaning -inside a list (@pxref{List Operators}). In addition, if the syntax bits -@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS} -aren't set, then (for historical reasons) the matcher considers special -characters ordinary if they are in contexts where the operations they -represent make no sense; for example, then the match-zero-or-more -operator (represented by @samp{*}) matches itself in the regular -expression @samp{*foo} because there is no preceding expression on which -it can operate. It is poor practice, however, to depend on this -behavior; if you want a special character to be ordinary outside a list, -it's better to always quote it, regardless.} or - -@item -inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set. - -@end itemize - -@item -It introduces an operator when followed by certain ordinary -characters---sometimes only when certain syntax bits are set. See the -cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR}, -@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also: - -@itemize @bullet -@item -@samp{\b} represents the match-word-boundary operator -(@pxref{Match-word-boundary Operator}). - -@item -@samp{\B} represents the match-within-word operator -(@pxref{Match-within-word Operator}). - -@item -@samp{\<} represents the match-beginning-of-word operator @* -(@pxref{Match-beginning-of-word Operator}). - -@item -@samp{\>} represents the match-end-of-word operator -(@pxref{Match-end-of-word Operator}). - -@item -@samp{\w} represents the match-word-constituent operator -(@pxref{Match-word-constituent Operator}). - -@item -@samp{\W} represents the match-non-word-constituent operator -(@pxref{Match-non-word-constituent Operator}). - -@item -@samp{\`} represents the match-beginning-of-buffer -operator and @samp{\'} represents the match-end-of-buffer operator -(@pxref{Buffer Operators}). - -@item -If Regex was compiled with the C preprocessor symbol @code{emacs} -defined, then @samp{\s@var{class}} represents the match-syntactic-class -operator and @samp{\S@var{class}} represents the -match-not-syntactic-class operator (@pxref{Syntactic Class Operators}). - -@end itemize - -@item -In all other cases, Regex ignores @samp{\}. For example, -@samp{\n} matches @samp{n}. - -@end enumerate - -@node Common Operators, GNU Operators, Regular Expression Syntax, Top -@chapter Common Operators - -You compose regular expressions from operators. In the following -sections, we describe the regular expression operators specified by -@sc{posix}; @sc{gnu} also uses these. Most operators have more than one -representation as characters. @xref{Regular Expression Syntax}, for -what characters represent what operators under what circumstances. - -For most operators that can be represented in two ways, one -representation is a single character and the other is that character -preceded by @samp{\}. For example, either @samp{(} or @samp{\(} -represents the open-group operator. Which one does depends on the -setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is -this so? Historical reasons dictate some of the varying -representations, while @sc{posix} dictates others. - -Finally, almost all characters lose any special meaning inside a list -(@pxref{List Operators}). - -@menu -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ -@end menu - -@node Match-self Operator, Match-any-character Operator, , Common Operators -@section The Match-self Operator (@var{ordinary character}) - -This operator matches the character itself. All ordinary characters -(@pxref{Regular Expression Syntax}) represent this operator. For -example, @samp{f} is always an ordinary character, so the regular -expression @samp{f} matches only the string @samp{f}. In -particular, it does @emph{not} match the string @samp{ff}. - -@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators -@section The Match-any-character Operator (@code{.}) - -@cindex @samp{.} - -This operator matches any single printing or nonprinting character -except it won't match a: - -@table @asis -@item newline -if the syntax bit @code{RE_DOT_NEWLINE} isn't set. - -@item null -if the syntax bit @code{RE_DOT_NOT_NULL} is set. - -@end table - -The @samp{.} (period) character represents this operator. For example, -@samp{a.b} matches any three-character string beginning with @samp{a} -and ending with @samp{b}. - -@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators -@section The Concatenation Operator - -This operator concatenates two regular expressions @var{a} and @var{b}. -No character represents this operator; you simply put @var{b} after -@var{a}. The result is a regular expression that will match a string if -@var{a} matches its first part and @var{b} matches the rest. For -example, @samp{xy} (two match-self operators) matches @samp{xy}. - -@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators -@section Repetition Operators - -Repetition operators repeat the preceding regular expression a specified -number of times. - -@menu -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} -@end menu - -@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators -@subsection The Match-zero-or-more Operator (@code{*}) - -@cindex @samp{*} - -This operator repeats the smallest possible preceding regular expression -as many times as necessary (including zero) to match the pattern. -@samp{*} represents this operator. For example, @samp{o*} -matches any string made up of zero or more @samp{o}s. Since this -operator operates on the smallest preceding regular expression, -@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So, -@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on. - -Since the match-zero-or-more operator is a suffix operator, it may be -useless as such when no regular expression precedes it. This is the -case when it: - -@itemize @bullet -@item -is first in a regular expression, or - -@item -follows a match-beginning-of-line, open-group, or alternation -operator. - -@end itemize - -@noindent -Three different things can happen in these cases: - -@enumerate -@item -If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the -regular expression is invalid. - -@item -If @code{RE_CONTEXT_INVALID_OPS} isn't set, but -@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the -match-zero-or-more operator (which then operates on the empty string). - -@item -Otherwise, @samp{*} is ordinary. - -@end enumerate - -@cindex backtracking -The matcher processes a match-zero-or-more operator by first matching as -many repetitions of the smallest preceding regular expression as it can. -Then it continues to match the rest of the pattern. - -If it can't match the rest of the pattern, it backtracks (as many times -as necessary), each time discarding one of the matches until it can -either match the entire pattern or be certain that it cannot get a -match. For example, when matching @samp{ca*ar} against @samp{caaar}, -the matcher first matches all three @samp{a}s of the string with the -@samp{a*} of the regular expression. However, it cannot then match the -final @samp{ar} of the regular expression against the final @samp{r} of -the string. So it backtracks, discarding the match of the last @samp{a} -in the string. It can then match the remaining @samp{ar}. - - -@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators -@subsection The Match-one-or-more Operator (@code{+} or @code{\+}) - -@cindex @samp{+} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize -this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't -set, then @samp{+} represents this operator; if it is, then @samp{\+} -does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression at least once; -@pxref{Match-zero-or-more Operator}, for what it operates on, how some -syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{+} represents the match-one-or-more -operator; then @samp{ca+r} matches, e.g., @samp{car} and -@samp{caaaar}, but not @samp{cr}. - -@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators -@subsection The Match-zero-or-one Operator (@code{?} or @code{\?}) -@cindex @samp{?} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator; -if it is, then @samp{\?} does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression once or not at all; -@pxref{Match-zero-or-more Operator}, to see what it operates on, how -some syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{?} represents the match-zero-or-one -operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but -nothing else. - -@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators -@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}}) - -@cindex interval expression -@cindex @samp{@{} -@cindex @samp{@}} -@cindex @samp{\@{} -@cindex @samp{\@}} - -If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes -@dfn{interval expressions}. They repeat the smallest possible preceding -regular expression a specified number of times. - -If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents -the @dfn{open-interval operator} and @samp{@}} represents the -@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do. - -Specifically, supposing that @samp{@{} and @samp{@}} represent the -open-interval and close-interval operators; then: - -@table @code -@item @{@var{count}@} -matches exactly @var{count} occurrences of the preceding regular -expression. - -@item @{@var{min,}@} -matches @var{min} or more occurrences of the preceding regular -expression. - -@item @{@var{min, max}@} -matches at least @var{min} but no more than @var{max} occurrences of -the preceding regular expression. - -@end table - -The interval expression (but not necessarily the regular expression that -contains it) is invalid if: - -@itemize @bullet -@item -@var{min} is greater than @var{max}, or - -@item -any of @var{count}, @var{min}, or @var{max} are outside the range -zero to @code{RE_DUP_MAX} (which symbol @file{regex.h} -defines). - -@end itemize - -If the interval expression is invalid and the syntax bit -@code{RE_NO_BK_BRACES} is set, then Regex considers all the -characters in the would-be interval to be ordinary. If that bit -isn't set, then the regular expression is invalid. - -If the interval expression is valid but there is no preceding regular -expression on which to operate, then if the syntax bit -@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid. -If that bit isn't set, then Regex considers all the characters---other -than backslashes, which it ignores---in the would-be interval to be -ordinary. - - -@node Alternation Operator, List Operators, Repetition Operators, Common Operators -@section The Alternation Operator (@code{|} or @code{\|}) - -@kindex | -@kindex \| -@cindex alternation operator -@cindex or operator - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator; -otherwise, @samp{\|} does. - -Alternatives match one of a choice of regular expressions: -if you put the character(s) representing the alternation operator between -any two regular expressions @var{a} and @var{b}, the result matches -the union of the strings that @var{a} and @var{b} match. For -example, supposing that @samp{|} is the alternation operator, then -@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or -@samp{quux}. - -@ignore -@c Nobody needs to disallow empty alternatives any more. -If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular -expressions @var{a} or @var{b} is empty, the -regular expression is invalid. More precisely, if this syntax bit is -set, then the alternation operator can't: - -@itemize @bullet -@item -be first or last in a regular expression; - -@item -follow either another alternation operator or an open-group operator -(@pxref{Grouping Operators}); or - -@item -precede a close-group operator. - -@end itemize - -@noindent -For example, supposing @samp{(} and @samp{)} represent the open and -close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar}, -@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid. -@end ignore - -The alternation operator operates on the @emph{largest} possible -surrounding regular expressions. (Put another way, it has the lowest -precedence of any regular expression operator.) -Thus, the only way you can -delimit its arguments is to use grouping. For example, if @samp{(} and -@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar} -would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would -match @samp{foo} or @samp{bar}.) - -@cindex backtracking -The matcher usually tries all combinations of alternatives so as to -match the longest possible string. For example, when matching -@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot -take, say, the first (``depth-first'') combination it could match, since -then it would be content to match just @samp{fooqbar}. - -@comment xx something about leftmost-longest - - -@node List Operators, Grouping Operators, Alternation Operator, Common Operators -@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -@cindex matching list -@cindex @samp{[} -@cindex @samp{]} -@cindex @samp{^} -@cindex @samp{-} -@cindex @samp{\} -@cindex @samp{[^} -@cindex nonmatching list -@cindex matching newline -@cindex bracket expression - -@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or -more items. An @dfn{item} is a character, -@ignore -(These get added when they get implemented.) -a collating symbol, an equivalence class expression, -@end ignore -a character class expression, or a range expression. The syntax bits -affect which kinds of items you can put in a list. We explain the last -two items in subsections below. Empty lists are invalid. - -A @dfn{matching list} matches a single character represented by one of -the list items. You form a matching list by enclosing one or more items -within an @dfn{open-matching-list operator} (represented by @samp{[}) -and a @dfn{close-list operator} (represented by @samp{]}). - -For example, @samp{[ab]} matches either @samp{a} or @samp{b}. -@samp{[ad]*} matches the empty string and any string composed of just -@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular -expression with a @samp{[} but no matching -@samp{]}. - -@dfn{Nonmatching lists} are similar to matching lists except that they -match a single character @emph{not} represented by one of the list -items. You use an @dfn{open-nonmatching-list operator} (represented by -@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be -the first character in the list. If you put a @samp{^} character first -in (what you think is) a matching list, you'll turn it into a -nonmatching list.}) instead of an open-matching-list operator to start a -nonmatching list. - -For example, @samp{[^ab]} matches any character except @samp{a} or -@samp{b}. - -If the @code{posix_newline} field in the pattern buffer (@pxref{GNU -Pattern Buffers} is set, then nonmatching lists do not match a newline. - -Most characters lose any special meaning inside a list. The special -characters inside a list follow. - -@table @samp -@item ] -ends the list if it's not the first list item. So, if you want to make -the @samp{]} character a list item, you must put it first. - -@item \ -quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is -set. - -@ignore -Put these in if they get implemented. - -@item [. -represents the open-collating-symbol operator (@pxref{Collating Symbol -Operators}). - -@item .] -represents the close-collating-symbol operator. - -@item [= -represents the open-equivalence-class operator (@pxref{Equivalence Class -Operators}). - -@item =] -represents the close-equivalence-class operator. - -@end ignore - -@item [: -represents the open-character-class operator (@pxref{Character Class -Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what -follows is a valid character class expression. - -@item :] -represents the close-character-class operator if the syntax bit -@code{RE_CHAR_CLASSES} is set and what precedes it is an -open-character-class operator followed by a valid character class name. - -@item - -represents the range operator (@pxref{Range Operator}) if it's -not first or last in a list or the ending point of a range. - -@end table - -@noindent -All other characters are ordinary. For example, @samp{[.*]} matches -@samp{.} and @samp{*}. - -@menu -* Character Class Operators:: [:class:] -* Range Operator:: start-end -@end menu - -@ignore -(If collating symbols and equivalence class expressions get implemented, -then add this.) - -node Collating Symbol Operators -subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]}) - -If the syntax bit @code{XX} is set, then you can represent -collating symbols inside lists. You form a @dfn{collating symbol} by -putting a collating element between an @dfn{open-collating-symbol -operator} and an @dfn{close-collating-symbol operator}. @samp{[.} -represents the open-collating-symbol operator and @samp{.]} represents -the close-collating-symbol operator. For example, if @samp{ll} is a -collating element, then @samp{[[.ll.]]} would match @samp{ll}. - -node Equivalence Class Operators -subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]}) -@cindex equivalence class expression in regex -@cindex @samp{[=} in regex -@cindex @samp{=]} in regex - -If the syntax bit @code{XX} is set, then Regex recognizes equivalence class -expressions inside lists. A @dfn{equivalence class expression} is a set -of collating elements which all belong to the same equivalence class. -You form an equivalence class expression by putting a collating -element between an @dfn{open-equivalence-class operator} and a -@dfn{close-equivalence-class operator}. @samp{[=} represents the -open-equivalence-class operator and @samp{=]} represents the -close-equivalence-class operator. For example, if @samp{a} and @samp{A} -were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]} -would match both @samp{a} and @samp{A}. If the collating element in an -equivalence class expression isn't part of an equivalence class, then -the matcher considers the equivalence class expression to be a collating -symbol. - -@end ignore - -@node Character Class Operators, Range Operator, , List Operators -@subsection Character Class Operators (@code{[:} @dots{} @code{:]}) - -@cindex character classes -@cindex @samp{[:} in regex -@cindex @samp{:]} in regex - -If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex -recognizes character class expressions inside lists. A @dfn{character -class expression} matches one character from a given class. You form a -character class expression by putting a character class name between an -@dfn{open-character-class operator} (represented by @samp{[:}) and a -@dfn{close-character-class operator} (represented by @samp{:]}). The -character class names and their meanings are: - -@table @code - -@item alnum -letters and digits - -@item alpha -letters - -@item blank -system-dependent; for @sc{gnu}, a space or tab - -@item cntrl -control characters (in the @sc{ascii} encoding, code 0177 and codes -less than 040) - -@item digit -digits - -@item graph -same as @code{print} except omits space - -@item lower -lowercase letters - -@item print -printable characters (in the @sc{ascii} encoding, space -tilde---codes 040 through 0176) - -@item punct -neither control nor alphanumeric characters - -@item space -space, carriage return, newline, vertical tab, and form feed - -@item upper -uppercase letters - -@item xdigit -hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F} - -@end table - -@noindent -These correspond to the definitions in the C library's @file{<ctype.h>} -facility. For example, @samp{[:alpha:]} corresponds to the standard -facility @code{isalpha}. Regex recognizes character class expressions -only inside of lists; so @samp{[[:alpha:]]} matches any letter, but -@samp{[:alpha:]} outside of a bracket expression and not followed by a -repetition operator matches just itself. - -@node Range Operator, , Character Class Operators, List Operators -@subsection The Range Operator (@code{-}) - -Regex recognizes @dfn{range expressions} inside a list. They represent -those characters -that fall between two elements in the current collating sequence. You -form a range expression by putting a @dfn{range operator} between two -@ignore -(If these get implemented, then substitute this for ``characters.'') -of any of the following: characters, collating elements, collating symbols, -and equivalence class expressions. The starting point of the range and -the ending point of the range don't have to be the same kind of item, -e.g., the starting point could be a collating element and the ending -point could be an equivalence class expression. If a range's ending -point is an equivalence class, then all the collating elements in that -class will be in the range. -@end ignore -characters.@footnote{You can't use a character class for the starting -or ending point of a range, since a character class is not a single -character.} @samp{-} represents the range operator. For example, -@samp{a-f} within a list represents all the characters from @samp{a} -through @samp{f} -inclusively. - -If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's -ending point collates less than its starting point, the range (and the -regular expression containing it) is invalid. For example, the regular -expression @samp{[z-a]} would be invalid. If this bit isn't set, then -Regex considers such a range to be empty. - -Since @samp{-} represents the range operator, if you want to make a -@samp{-} character itself -a list item, you must do one of the following: - -@itemize @bullet -@item -Put the @samp{-} either first or last in the list. - -@item -Include a range whose starting point collates strictly lower than -@samp{-} and whose ending point collates equal or higher. Unless a -range is the first item in a list, a @samp{-} can't be its starting -point, but @emph{can} be its ending point. That is because Regex -considers @samp{-} to be the range operator unless it is preceded by -another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)}, -@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are -contiguous characters in the collating sequence. You might think that -@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it -has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so -it matches, e.g., @samp{,}, not @samp{.}. - -@item -Put a range whose starting point is @samp{-} first in the list. - -@end itemize - -For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in -English, in @sc{ascii}). - - -@node Grouping Operators, Back-reference Operator, List Operators, Common Operators -@section Grouping Operators (@code{(} @dots{} @code{)} or @code{\(} @dots{} @code{\)}) - -@kindex ( -@kindex ) -@kindex \( -@kindex \) -@cindex grouping -@cindex subexpressions -@cindex parenthesizing - -A @dfn{group}, also known as a @dfn{subexpression}, consists of an -@dfn{open-group operator}, any number of other operators, and a -@dfn{close-group operator}. Regex treats this sequence as a unit, just -as mathematics and programming languages treat a parenthesized -expression as a unit. - -Therefore, using @dfn{groups}, you can: - -@itemize @bullet -@item -delimit the argument(s) to an alternation operator (@pxref{Alternation -Operator}) or a repetition operator (@pxref{Repetition -Operators}). - -@item -keep track of the indices of the substring that matched a given group. -@xref{Using Registers}, for a precise explanation. -This lets you: - -@itemize @bullet -@item -use the back-reference operator (@pxref{Back-reference Operator}). - -@item -use registers (@pxref{Using Registers}). - -@end itemize - -@end itemize - -If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents -the open-group operator and @samp{)} represents the -close-group operator; otherwise, @samp{\(} and @samp{\)} do. - -If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a -close-group operator has no matching open-group operator, then Regex -considers it to match @samp{)}. - - -@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators -@section The Back-reference Operator (@dfn{\}@var{digit}) - -@cindex back references - -If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes -back references. A back reference matches a specified preceding group. -The back reference operator is represented by @samp{\@var{digit}} -anywhere after the end of a regular expression's @w{@var{digit}-th} -group (@pxref{Grouping Operators}). - -@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns -numbers 1 through 9 to the first nine groups it encounters. By using -one of @samp{\1} through @samp{\9} after the corresponding group's -close-group operator, you can match a substring identical to the -one that the group does. - -Back references match according to the following (in all examples below, -@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{} -the open-interval and @samp{@}} the close-interval operator): - -@itemize @bullet -@item -If the group matches a substring, the back reference matches an -identical substring. For example, @samp{(a)\1} matches @samp{aa} and -@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise, -@samp{(.*)\1} matches any (newline-free if the syntax bit -@code{RE_DOT_NEWLINE} isn't set) string that is composed of two -identical halves; the @samp{(.*)} matches the first half and the -@samp{\1} matches the second half. - -@item -If the group matches more than once (as it might if followed -by, e.g., a repetition operator), then the back reference matches the -substring the group @emph{last} matched. For example, -@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the -outer one) matches @samp{aab} and @w{group 2} (the inner one) matches -@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches -@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches -@samp{a}. - -@item -If the group doesn't participate in a match, i.e., it is part of an -alternative not taken or a repetition operator allows zero repetitions -of it, then the back reference makes the whole match fail. For example, -@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three} -and @samp{two-and-four}, but not @samp{one-and-four} or -@samp{two-and-three}. For example, if the pattern matches -@samp{one-and-}, then its @w{group 2} matches the empty string and its -@w{group 3} doesn't participate in the match. So, if it then matches -@samp{four}, then when it tries to back reference @w{group 3}---which it -will attempt to do because @samp{\3} follows the @samp{four}---the match -will fail because @w{group 3} didn't participate in the match. - -@end itemize - -You can use a back reference as an argument to a repetition operator. For -example, @samp{(a(b))\2*} matches @samp{a} followed by two or more -@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}. - -If there is no preceding @w{@var{digit}-th} subexpression, the regular -expression is invalid. - - -@node Anchoring Operators, , Back-reference Operator, Common Operators -@section Anchoring Operators - -@cindex anchoring -@cindex regexp anchoring - -These operators can constrain a pattern to match only at the beginning or -end of the entire string or at the beginning or end of a line. - -@menu -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ -@end menu - - -@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators -@subsection The Match-beginning-of-line Operator (@code{^}) - -@kindex ^ -@cindex beginning-of-line operator -@cindex anchors - -This operator can match the empty string either at the beginning of the -string or after a newline character. Thus, it is said to @dfn{anchor} -the pattern to the beginning of a line. - -In the cases following, @samp{^} represents this operator. (Otherwise, -@samp{^} is ordinary.) - -@itemize @bullet - -@item -It (the @samp{^}) is first in the pattern, as in @samp{^foo}. - -@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})} -@item -The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside -a bracket expression. - -@cindex open-group operator and @samp{^} -@cindex alternation operator and @samp{^} -@item -It follows an open-group or alternation operator, as in @samp{a\(^b\)} -and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation -Operator}. - -@end itemize - -These rules imply that some valid patterns containing @samp{^} cannot be -matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS} -is set. - -@vindex not_bol @r{field in pattern buffer} -If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU -Pattern Buffers}), then @samp{^} fails to match at the beginning of the -string. @xref{POSIX Matching}, for when you might find this useful. - -@vindex newline_anchor @r{field in pattern buffer} -If the @code{newline_anchor} field is set in the pattern buffer, then -@samp{^} fails to match after a newline. This is useful when you do not -regard the string to be matched as broken into lines. - - -@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators -@subsection The Match-end-of-line Operator (@code{$}) - -@kindex $ -@cindex end-of-line operator -@cindex anchors - -This operator can match the empty string either at the end of -the string or before a newline character in the string. Thus, it is -said to @dfn{anchor} the pattern to the end of a line. - -It is always represented by @samp{$}. For example, @samp{foo$} usually -matches, e.g., @samp{foo} and, e.g., the first three characters of -@samp{foo\nbar}. - -Its interaction with the syntax bits and pattern buffer fields is -exactly the dual of @samp{^}'s; see the previous section. (That is, -``beginning'' becomes ``end'', ``next'' becomes ``previous'', and -``after'' becomes ``before''.) - - -@node GNU Operators, GNU Emacs Operators, Common Operators, Top -@chapter GNU Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't). - -@menu -* Word Operators:: -* Buffer Operators:: -@end menu - -@node Word Operators, Buffer Operators, , GNU Operators -@section Word Operators - -The operators in this section require Regex to recognize parts of words. -Regex uses a syntax table to determine whether or not a character is -part of a word, i.e., whether or not it is @dfn{word-constituent}. - -@menu -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W -@end menu - -@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators -@subsection Non-Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. Regex always uses a @code{char *} variable -@code{re_syntax_table} as its syntax table. In some cases, it -initializes this variable and in others it expects you to initialize it. - -@itemize @bullet -@item -If Regex is compiled with the preprocessor symbols @code{emacs} and -@code{SYNTAX_TABLE} both undefined, then Regex allocates -@code{re_syntax_table} and initializes an element @var{i} either to -@code{Sword} (which it defines) if @var{i} is a letter, number, or -@samp{_}, or to zero if it's not. - -@item -If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE} -defined, then Regex expects you to define a @code{char *} variable -@code{re_syntax_table} to be a valid syntax table. - -@item -@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with -the preprocessor symbol @code{emacs} defined. - -@end itemize - -@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators -@subsection The Match-word-boundary Operator (@code{\b}) - -@cindex @samp{\b} -@cindex word boundaries, matching - -This operator (represented by @samp{\b}) matches the empty string at -either the beginning or the end of a word. For example, @samp{\brat\b} -matches the separate word @samp{rat}. - -@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators -@subsection The Match-within-word Operator (@code{\B}) - -@cindex @samp{\B} - -This operator (represented by @samp{\B}) matches the empty string within -a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but -@samp{dirty \Brat} doesn't match @samp{dirty rat}. - -@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators -@subsection The Match-beginning-of-word Operator (@code{\<}) - -@cindex @samp{\<} - -This operator (represented by @samp{\<}) matches the empty string at the -beginning of a word. - -@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators -@subsection The Match-end-of-word Operator (@code{\>}) - -@cindex @samp{\>} - -This operator (represented by @samp{\>}) matches the empty string at the -end of a word. - -@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators -@subsection The Match-word-constituent Operator (@code{\w}) - -@cindex @samp{\w} - -This operator (represented by @samp{\w}) matches any word-constituent -character. - -@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators -@subsection The Match-non-word-constituent Operator (@code{\W}) - -@cindex @samp{\W} - -This operator (represented by @samp{\W}) matches any character that is -not word-constituent. - - -@node Buffer Operators, , Word Operators, GNU Operators -@section Buffer Operators - -Following are operators which work on buffers. In Emacs, a @dfn{buffer} -is, naturally, an Emacs buffer. For other programs, Regex considers the -entire string to be matched as the buffer. - -@menu -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' -@end menu - - -@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators -@subsection The Match-beginning-of-buffer Operator (@code{\`}) - -@cindex @samp{\`} - -This operator (represented by @samp{\`}) matches the empty string at the -beginning of the buffer. - -@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators -@subsection The Match-end-of-buffer Operator (@code{\'}) - -@cindex @samp{\'} - -This operator (represented by @samp{\'}) matches the empty string at the -end of the buffer. - - -@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top -@chapter GNU Emacs Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't) -that you can use only when Regex is compiled with the preprocessor -symbol @code{emacs} defined. - -@menu -* Syntactic Class Operators:: -@end menu - - -@node Syntactic Class Operators, , , GNU Emacs Operators -@section Syntactic Class Operators - -The operators in this section require Regex to recognize the syntactic -classes of characters. Regex uses a syntax table to determine this. - -@menu -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS -@end menu - -@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators -@subsection Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. - -If Regex is compiled with the preprocessor symbol @code{emacs} defined, -then Regex expects you to define and initialize the variable -@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax -tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax -Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual}, -for a description of Emacs' syntax tables. - -@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators -@subsection The Match-syntactic-class Operator (@code{\s}@var{class}) - -@cindex @samp{\s} - -This operator matches any character whose syntactic class is represented -by a specified character. @samp{\s@var{class}} represents this operator -where @var{class} is the character representing the syntactic class you -want. For example, @samp{w} represents the syntactic -class of word-constituent characters, so @samp{\sw} matches any -word-constituent character. - -@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators -@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class}) - -@cindex @samp{\S} - -This operator is similar to the match-syntactic-class operator except -that it matches any character whose syntactic class is @emph{not} -represented by the specified character. @samp{\S@var{class}} represents -this operator. For example, @samp{w} represents the syntactic class of -word-constituent characters, so @samp{\Sw} matches any character that is -not word-constituent. - - -@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top -@chapter What Gets Matched? - -Regex usually matches strings according to the ``leftmost longest'' -rule; that is, it chooses the longest of the leftmost matches. This -does not mean that for a regular expression containing subexpressions -that it simply chooses the longest match for each subexpression, left to -right; the overall match must also be the longest possible one. - -For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not -@samp{acdac}, as it would if it were to choose the longest match for the -first subexpression. - - -@node Programming with Regex, Copying, What Gets Matched?, Top -@chapter Programming with Regex - -Here we describe how you use the Regex data structures and functions in -C programs. Regex has three interfaces: one designed for @sc{gnu}, one -compatible with @sc{posix} and one compatible with Berkeley @sc{unix}. - -@menu -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: -@end menu - - -@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex -@section GNU Regex Functions - -If you're writing code that doesn't need to be compatible with either -@sc{posix} or Berkeley @sc{unix}, you can use these functions. They -provide more options than the other interfaces. - -@menu -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () -@end menu - - -@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions -@subsection GNU Pattern Buffers - -@cindex pattern buffer, definition of -@tindex re_pattern_buffer @r{definition} -@tindex struct re_pattern_buffer @r{definition} - -To compile, match, or search for a given regular expression, you must -supply a pattern buffer. A @dfn{pattern buffer} holds one compiled -regular expression.@footnote{Regular expressions are also referred to as -``patterns,'' hence the name ``pattern buffer.''} - -You can have several different pattern buffers simultaneously, each -holding a compiled pattern for a different regular expression. - -@file{regex.h} defines the pattern buffer @code{struct} as follows: - -@example - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -@end example - - -@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions -@subsection GNU Regular Expression Compiling - -In @sc{gnu}, you can both match and search for a given regular -expression. To do either, you must first compile it in a pattern buffer -(@pxref{GNU Pattern Buffers}). - -@cindex syntax initialization -@vindex re_syntax_options @r{initialization} -Regular expressions match according to the syntax with which they were -compiled; with @sc{gnu}, you indicate what syntax you want by setting -the variable @code{re_syntax_options} (declared in @file{regex.h} and -defined in @file{regex.c}) before calling the compiling function, -@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and -@ref{Predefined Syntaxes}. - -You can change the value of @code{re_syntax_options} at any time. -Usually, however, you set its value once and then never change it. - -@cindex pattern buffer initialization -@code{re_compile_pattern} takes a pattern buffer as an argument. You -must initialize the following fields: - -@table @code - -@item translate @r{initialization} - -@item translate -@vindex translate @r{initialization} -Initialize this to point to a translate table if you want one, or to -zero if you don't. We explain translate tables in @ref{GNU Translate -Tables}. - -@item fastmap -@vindex fastmap @r{initialization} -Initialize this to nonzero if you want a fastmap, or to zero if you -don't. - -@item buffer -@itemx allocated -@vindex buffer @r{initialization} -@vindex allocated @r{initialization} -@findex malloc -If you want @code{re_compile_pattern} to allocate memory for the -compiled pattern, set both of these to zero. If you have an existing -block of memory (allocated with @code{malloc}) you want Regex to use, -set @code{buffer} to its address and @code{allocated} to its size (in -bytes). - -@code{re_compile_pattern} uses @code{realloc} to extend the space for -the compiled pattern as necessary. - -@end table - -To compile a pattern buffer, use: - -@findex re_compile_pattern -@example -char * -re_compile_pattern (const char *@var{regex}, const int @var{regex_size}, - struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{regex} is the regular expression's address, @var{regex_size} is its -length, and @var{pattern_buffer} is the pattern buffer's address. - -If @code{re_compile_pattern} successfully compiles the regular -expression, it returns zero and sets @code{*@var{pattern_buffer}} to the -compiled pattern. It sets the pattern buffer's fields as follows: - -@table @code -@item buffer -@vindex buffer @r{field, set by @code{re_compile_pattern}} -to the compiled pattern. - -@item used -@vindex used @r{field, set by @code{re_compile_pattern}} -to the number of bytes the compiled pattern in @code{buffer} occupies. - -@item syntax -@vindex syntax @r{field, set by @code{re_compile_pattern}} -to the current value of @code{re_syntax_options}. - -@item re_nsub -@vindex re_nsub @r{field, set by @code{re_compile_pattern}} -to the number of subexpressions in @var{regex}. - -@item fastmap_accurate -@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}} -to zero on the theory that the pattern you're compiling is different -than the one previously compiled into @code{buffer}; in that case (since -you can't make a fastmap without a compiled pattern), -@code{fastmap} would either contain an incompatible fastmap, or nothing -at all. - -@c xx what else? -@end table - -If @code{re_compile_pattern} can't compile @var{regex}, it returns an -error string corresponding to one of the errors listed in @ref{POSIX -Regular Expression Compiling}. - - -@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions -@subsection GNU Matching - -@cindex matching with GNU functions - -Matching the @sc{gnu} way means trying to match as much of a string as -possible starting at a position within it you specify. Once you've compiled -a pattern into a pattern buffer (@pxref{GNU Regular Expression -Compiling}), you can ask the matcher to match that pattern against a -string using: - -@findex re_match -@example -int -re_match (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, struct re_registers *@var{regs}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer containing a -compiled pattern. @var{string} is the string you want to match; it can -contain newline and null characters. @var{size} is the length of that -string. @var{start} is the string index at which you want to -begin matching; the first character of @var{string} is at index zero. -@xref{Using Registers}, for a explanation of @var{regs}; you can safely -pass zero. - -@code{re_match} matches the regular expression in @var{pattern_buffer} -against the string @var{string} according to the syntax in -@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular -Expression Compiling}, for how to set it.) The function returns -@math{-1} if the compiled pattern does not match any part of -@var{string} and @math{-2} if an internal error happens; otherwise, it -returns how many (possibly zero) characters of @var{string} the pattern -matched. - -An example: suppose @var{pattern_buffer} points to a pattern buffer -containing the compiled pattern for @samp{a*}, and @var{string} points -to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start} -is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the -last three @samp{a}s in @var{string}. If @var{start} is 0, -@code{re_match} returns 5, i.e., @samp{a*} would have matched all the -@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns -zero. - -If @var{start} is not between zero and @var{size}, then -@code{re_match} returns @math{-1}. - - -@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions -@subsection GNU Searching - -@cindex searching with GNU functions - -@dfn{Searching} means trying to match starting at successive positions -within a string. The function @code{re_search} does this. - -Before calling @code{re_search}, you must compile your regular -expression. @xref{GNU Regular Expression Compiling}. - -Here is the function declaration: - -@findex re_search -@example -int -re_search (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}) -@end example - -@noindent -@vindex start @r{argument to @code{re_search}} -@vindex range @r{argument to @code{re_search}} -whose arguments are the same as those to @code{re_match} (@pxref{GNU -Matching}) except that the two arguments @var{start} and @var{range} -replace @code{re_match}'s argument @var{start}. - -If @var{range} is positive, then @code{re_search} attempts a match -starting first at index @var{start}, then at @math{@var{start} + 1} if -that fails, and so on, up to @math{@var{start} + @var{range}}; if -@var{range} is negative, then it attempts a match starting first at -index @var{start}, then at @math{@var{start} -1} if that fails, and so -on. - -If @var{start} is not between zero and @var{size}, then @code{re_search} -returns @math{-1}. When @var{range} is positive, @code{re_search} -adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is -between zero and @var{size}, if necessary; that way it won't search -outside of @var{string}. Similarly, when @var{range} is negative, -@code{re_search} adjusts @var{range} so that @math{@var{start} + -@var{range} + 1} is between zero and @var{size}, if necessary. - -If the @code{fastmap} field of @var{pattern_buffer} is zero, -@code{re_search} matches starting at consecutive positions; otherwise, -it uses @code{fastmap} to make the search more efficient. -@xref{Searching with Fastmaps}. - -If no match is found, @code{re_search} returns @math{-1}. If -a match is found, it returns the index where the match began. If an -internal error happens, it returns @math{-2}. - - -@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions -@subsection Matching and Searching with Split Data - -Using the functions @code{re_match_2} and @code{re_search_2}, you can -match or search in data that is divided into two strings. - -The function: - -@findex re_match_2 -@example -int -re_match_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similar to @code{re_match} (@pxref{GNU Matching}) except that you -pass @emph{two} data strings and sizes, and an index @var{stop} beyond -which you don't want the matcher to try matching. As with -@code{re_match}, if it succeeds, @code{re_match_2} returns how many -characters of @var{string} it matched. Regard @var{string1} and -@var{string2} as concatenated when you set the arguments @var{start} and -@var{stop} and use the contents of @var{regs}; @code{re_match_2} never -returns a value larger than @math{@var{size1} + @var{size2}}. - -The function: - -@findex re_search_2 -@example -int -re_search_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similarly related to @code{re_search}. - - -@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions -@subsection Searching with Fastmaps - -@cindex fastmaps -If you're searching through a long string, you should use a fastmap. -Without one, the searcher tries to match at consecutive positions in the -string. Generally, most of the characters in the string could not start -a match. It takes much longer to try matching at a given position in the -string than it does to check in a table whether or not the character at -that position could start a match. A @dfn{fastmap} is such a table. - -More specifically, a fastmap is an array indexed by the characters in -your character set. Under the @sc{ascii} encoding, therefore, a fastmap -has 256 elements. If you want the searcher to use a fastmap with a -given pattern buffer, you must allocate the array and assign the array's -address to the pattern buffer's @code{fastmap} field. You either can -compile the fastmap yourself or have @code{re_search} do it for you; -when @code{fastmap} is nonzero, it automatically compiles a fastmap the -first time you search using a particular compiled pattern. - -To compile a fastmap yourself, use: - -@findex re_compile_fastmap -@example -int -re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer. If the -character @var{c} could start a match for the pattern, -@code{re_compile_fastmap} makes -@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns -@math{0} if it can compile a fastmap and @math{-2} if there is an -internal error. For example, if @samp{|} is the alternation operator -and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then -@code{re_compile_fastmap} sets @code{fastmap['a']} and -@code{fastmap['b']} (and no others). - -@code{re_search} uses a fastmap as it moves along in the string: it -checks the string's characters until it finds one that's in the fastmap. -Then it tries matching at that character. If the match fails, it -repeats the process. So, by using a fastmap, @code{re_search} doesn't -waste time trying to match at positions in the string that couldn't -start a match. - -If you don't want @code{re_search} to use a fastmap, -store zero in the @code{fastmap} field of the pattern buffer before -calling @code{re_search}. - -Once you've initialized a pattern buffer's @code{fastmap} field, you -need never do so again---even if you compile a new pattern in -it---provided the way the field is set still reflects whether or not you -want a fastmap. @code{re_search} will still either do nothing if -@code{fastmap} is null or, if it isn't, compile a new fastmap for the -new pattern. - -@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions -@subsection GNU Translate Tables - -If you set the @code{translate} field of a pattern buffer to a translate -table, then the @sc{gnu} Regex functions to which you've passed that -pattern buffer use it to apply a simple transformation -to all the regular expression and string characters at which they look. - -A @dfn{translate table} is an array indexed by the characters in your -character set. Under the @sc{ascii} encoding, therefore, a translate -table has 256 elements. The array's elements are also characters in -your character set. When the Regex functions see a character @var{c}, -they use @code{translate[@var{c}]} in its place, with one exception: the -character after a @samp{\} is not translated. (This ensures that, the -operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.) - -For example, a table that maps all lowercase letters to the -corresponding uppercase ones would cause the matcher to ignore -differences in case.@footnote{A table that maps all uppercase letters to -the corresponding lowercase ones would work just as well for this -purpose.} Such a table would map all characters except lowercase letters -to themselves, and lowercase letters to the corresponding uppercase -ones. Under the @sc{ascii} encoding, here's how you could initialize -such a table (we'll call it @code{case_fold}): - -@example -for (i = 0; i < 256; i++) - case_fold[i] = i; -for (i = 'a'; i <= 'z'; i++) - case_fold[i] = i - ('a' - 'A'); -@end example - -You tell Regex to use a translate table on a given pattern buffer by -assigning that table's address to the @code{translate} field of that -buffer. If you don't want Regex to do any translation, put zero into -this field. You'll get weird results if you change the table's contents -anytime between compiling the pattern buffer, compiling its fastmap, and -matching or searching with the pattern buffer. - -@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions -@subsection Using Registers - -A group in a regular expression can match a (posssibly empty) substring -of the string that regular expression as a whole matched. The matcher -remembers the beginning and end of the substring matched by -each group. - -To find out what they matched, pass a nonzero @var{regs} argument to a -@sc{gnu} matching or searching function (@pxref{GNU Matching} and -@ref{GNU Searching}), i.e., the address of a structure of this type, as -defined in @file{regex.h}: - -@c We don't bother to include this directly from regex.h, -@c since it changes so rarely. -@example -@tindex re_registers -@vindex num_regs @r{in @code{struct re_registers}} -@vindex start @r{in @code{struct re_registers}} -@vindex end @r{in @code{struct re_registers}} -struct re_registers -@{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -@}; -@end example - -Except for (possibly) the @var{num_regs}'th element (see below), the -@var{i}th element of the @code{start} and @code{end} arrays records -information about the @var{i}th group in the pattern. (They're declared -as C pointers, but this is only because not all C compilers accept -zero-length arrays; conceptually, it is simplest to think of them as -arrays.) - -The @code{start} and @code{end} arrays are allocated in various ways, -depending on the value of the @code{regs_allocated} -@vindex regs_allocated -field in the pattern buffer passed to the matcher. - -The simplest and perhaps most useful is to let the matcher (re)allocate -enough space to record information for all the groups in the regular -expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED}, -@vindex REGS_UNALLOCATED -the matcher allocates @math{1 + @var{re_nsub}} (another field in the -pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set -to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}. -@vindex REGS_REALLOCATE -Then on subsequent calls with the same pattern buffer and @var{regs} -arguments, the matcher reallocates more space if necessary. - -It would perhaps be more logical to make the @code{regs_allocated} field -part of the @code{re_registers} structure, instead of part of the -pattern buffer. But in that case the caller would be forced to -initialize the structure before passing it. Much existing code doesn't -do this initialization, and it's arguably better to avoid it anyway. - -@code{re_compile_pattern} sets @code{regs_allocated} to -@code{REGS_UNALLOCATED}, -so if you use the GNU regular expression -functions, you get this behavior by default. - -xx document re_set_registers - -@sc{posix}, on the other hand, requires a different interface: the -caller is supposed to pass in a fixed-length array which the matcher -fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED} -@vindex REGS_FIXED -the matcher simply fills that array. - -The following examples illustrate the information recorded in the -@code{re_registers} structure. (In all of them, @samp{(} represents the -open-group and @samp{)} the close-group operator. The first character -in the string @var{string} is at index 0.) - -@c xx i'm not sure this is all true anymore. - -@itemize @bullet - -@item -If the regular expression has an @w{@var{i}-th} -group not contained within another group that matches a -substring of @var{string}, then the function sets -@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where -the substring matched by the @w{@var{i}-th} group begins, and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -substring's end. The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match @samp{((a)(b))} against @samp{ab}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} - -@item -1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]} -@end itemize - -@item -If a group matches more than once (as it might if followed by, -e.g., a repetition operator), then the function reports the information -about what the group @emph{last} matched. - -For example, when you match the pattern @samp{(a)*} against the string -@samp{aa}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group does not participate in a -successful match, e.g., it is an alternative not taken or a -repetition operator allows zero repetitions of it, then the function -sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}. - -For example, when you match the pattern @samp{(a)*b} against -the string @samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group matches a zero-length string, then the -function sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -zero-length string. - -For example, when you match the pattern @samp{(a*)b} against the string -@samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@ignore -The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match the pattern @samp{(a*)} against the empty -string, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize -@end ignore - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} and -the function reports a match of the @w{@var{i}-th} group, then it -records in @code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of -the @w{@var{j}-th} group. - -For example, when you match the pattern @samp{((a*)b)*} against the -string @samp{abb}, @w{group 2} last matches the empty string, so you -get what it previously matched: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]} -@end itemize - -When you match the pattern @samp{((a)*b)*} against the string -@samp{abb}, @w{group 2} doesn't participate in the last match, so you -get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} -@end itemize - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} -and the function sets -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets -@code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}. - -For example, when you match the pattern @samp{((a)*b)*c} against the -string @samp{c}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]} -@end itemize - -@end itemize - -@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions -@subsection Freeing GNU Pattern Buffers - -To free any allocated fields of a pattern buffer, you can use the -@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers}, -since the type @code{regex_t}---the type for @sc{posix} pattern -buffers---is equivalent to the type @code{re_pattern_buffer}. After -freeing a pattern buffer, you need to again compile a regular expression -in it (@pxref{GNU Regular Expression Compiling}) before passing it to -a matching or searching function. - - -@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex -@section POSIX Regex Functions - -If you're writing code that has to be @sc{posix} compatible, you'll need -to use these functions. Their interfaces are as specified by @sc{posix}, -draft 1003.2/D11.2. - -@menu -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () -@end menu - - -@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions -@subsection POSIX Pattern Buffers - -To compile or match a given regular expression the @sc{posix} way, you -must supply a pattern buffer exactly the way you do for @sc{gnu} -(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type -@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer -type @code{re_pattern_buffer}. - - -@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions -@subsection POSIX Regular Expression Compiling - -With @sc{posix}, you can only search for a given regular expression; you -can't match it. To do this, you must first compile it in a -pattern buffer, using @code{regcomp}. - -@ignore -Before calling @code{regcomp}, you must initialize this pattern buffer -as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See -below, however, for how to choose a syntax with which to compile. -@end ignore - -To compile a pattern buffer, use: - -@findex regcomp -@example -int -regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags}) -@end example - -@noindent -@var{preg} is the initialized pattern buffer's address, @var{regex} is -the regular expression's address, and @var{cflags} is the compilation -flags, which Regex considers as a collection of bits. Here are the -valid bits, as defined in @file{regex.h}: - -@table @code - -@item REG_EXTENDED -@vindex REG_EXTENDED -says to use @sc{posix} Extended Regular Expression syntax; if this isn't -set, then says to use @sc{posix} Basic Regular Expression syntax. -@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly. - -@item REG_ICASE -@vindex REG_ICASE -@cindex ignoring case -says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate} -field to a translate table which ignores case, replacing anything you've -put there before. - -@item REG_NOSUB -@vindex REG_NOSUB -says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching}, -for what this means. - -@item REG_NEWLINE -@vindex REG_NEWLINE -says that a: - -@itemize @bullet - -@item -match-any-character operator (@pxref{Match-any-character -Operator}) doesn't match a newline. - -@item -nonmatching list not containing a newline (@pxref{List -Operators}) matches a newline. - -@item -match-beginning-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately after a newline, -regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for -an explanation of @code{REG_NOTBOL}). - -@item -match-end-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately before a newline, -regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching}, -for an explanation of @code{REG_NOTEOL}). - -@end itemize - -@end table - -If @code{regcomp} successfully compiles the regular expression, it -returns zero and sets @code{*@var{pattern_buffer}} to the compiled -pattern. Except for @code{syntax} (which it sets as explained above), it -also sets the same fields the same way as does the @sc{gnu} compiling -function (@pxref{GNU Regular Expression Compiling}). - -If @code{regcomp} can't compile the regular expression, it returns one -of the error codes listed here. (Except when noted differently, the -syntax of in all examples below is basic regular expression syntax.) - -@table @code - -@comment repetitions -@item REG_BADRPT -For example, the consecutive repetition operators @samp{**} in -@samp{a**} are invalid. As another example, if the syntax is extended -regular expression syntax, then the repetition operator @samp{*} with -nothing on which to operate in @samp{*} is invalid. - -@item REG_BADBR -For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid. - -@item REG_EBRACE -For example, @samp{a\@{1} is missing a close-interval operator. - -@comment lists -@item REG_EBRACK -For example, @samp{[a} is missing a close-list operator. - -@item REG_ERANGE -For example, the range ending point @samp{z} that collates lower than -does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the -range with the character class @samp{[:alpha:]} as its starting point in -@samp{[[:alpha:]-|]}. - -@item REG_ECTYPE -For example, the character class name @samp{foo} in @samp{[[:foo:]} is -invalid. - -@comment groups -@item REG_EPAREN -For example, @samp{a\)} is missing an open-group operator and @samp{\(a} -is missing a close-group operator. - -@item REG_ESUBREG -For example, the back reference @samp{\2} that refers to a nonexistent -subexpression in @samp{\(a\)\2} is invalid. - -@comment unfinished business - -@item REG_EEND -Returned when a regular expression causes no other more specific error. - -@item REG_EESCAPE -For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the -one in @samp{\}. - -@comment kitchen sink -@item REG_BADPAT -For example, in the extended regular expression syntax, the empty group -@samp{()} in @samp{a()b} is invalid. - -@comment internal -@item REG_ESIZE -Returned when a regular expression needs a pattern buffer larger than -65536 bytes. - -@item REG_ESPACE -Returned when a regular expression makes Regex to run out of memory. - -@end table - - -@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions -@subsection POSIX Matching - -Matching the @sc{posix} way means trying to match a null-terminated -string starting at its first character. Once you've compiled a pattern -into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you -can ask the matcher to match that pattern against a string using: - -@findex regexec -@example -int -regexec (const regex_t *@var{preg}, const char *@var{string}, - size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags}) -@end example - -@noindent -@var{preg} is the address of a pattern buffer for a compiled pattern. -@var{string} is the string you want to match. - -@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you -pass zero for @var{nmatch} or you compiled @var{preg} with the -compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore -@var{pmatch}; otherwise, you must allocate it to have at least -@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte -offsets in @var{pmatch}, and set to @math{-1} any unused elements up to -@math{@var{pmatch}@code{[@var{nmatch}]} - 1}. - -@var{eflags} specifies @dfn{execution flags}---namely, the two bits -@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If -you set @code{REG_NOTBOL}, then the match-beginning-of-line operator -(@pxref{Match-beginning-of-line Operator}) always fails to match. -This lets you match against pieces of a line, as you would need to if, -say, searching for repeated instances of a given pattern in a line; it -would work correctly for patterns both with and without -match-beginning-of-line operators. @code{REG_NOTEOL} works analogously -for the match-end-of-line operator (@pxref{Match-end-of-line -Operator}); it exists for symmetry. - -@code{regexec} tries to find a match for @var{preg} in @var{string} -according to the syntax in @var{preg}'s @code{syntax} field. -(@xref{POSIX Regular Expression Compiling}, for how to set it.) The -function returns zero if the compiled pattern matches @var{string} and -@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't. - -@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions -@subsection Reporting Errors - -If either @code{regcomp} or @code{regexec} fail, they return a nonzero -error code, the possibilities for which are defined in @file{regex.h}. -@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for -what these codes mean. To get an error string corresponding to these -codes, you can use: - -@findex regerror -@example -size_t -regerror (int @var{errcode}, - const regex_t *@var{preg}, - char *@var{errbuf}, - size_t @var{errbuf_size}) -@end example - -@noindent -@var{errcode} is an error code, @var{preg} is the address of the pattern -buffer which provoked the error, @var{errbuf} is the error buffer, and -@var{errbuf_size} is @var{errbuf}'s size. - -@code{regerror} returns the size in bytes of the error string -corresponding to @var{errcode} (including its terminating null). If -@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in -@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the -error string, followed by a null. -@var{errbuf_size} must be a nonnegative number less than or equal to the -size in bytes of @var{errbuf}. - -You can call @code{regerror} with a null @var{errbuf} and a zero -@var{errbuf_size} to determine how large @var{errbuf} need be to -accommodate @code{regerror}'s error string. - -@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions -@subsection Using Byte Offsets - -In @sc{posix}, variables of type @code{regmatch_t} hold analogous -information, but are not identical to, @sc{gnu}'s registers (@pxref{Using -Registers}). To get information about registers in @sc{posix}, pass to -@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e., -the address of a structure of this type, defined in -@file{regex.h}: - -@tindex regmatch_t -@example -typedef struct -@{ - regoff_t rm_so; - regoff_t rm_eo; -@} regmatch_t; -@end example - -When reading in @ref{Using Registers}, about how the matching function -stores the information into the registers, substitute @var{pmatch} for -@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for -@code{@w{@var{regs}->}end[@var{i}]}. - -@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions -@subsection Freeing POSIX Pattern Buffers - -To free any allocated fields of a pattern buffer, use: - -@findex regfree -@example -void -regfree (regex_t *@var{preg}) -@end example - -@noindent -@var{preg} is the pattern buffer whose allocated fields you want freed. -@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used} -fields to zero. After freeing a pattern buffer, you need to again -compile a regular expression in it (@pxref{POSIX Regular Expression -Compiling}) before passing it to the matching function (@pxref{POSIX -Matching}). - - -@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex -@section BSD Regex Functions - -If you're writing code that has to be Berkeley @sc{unix} compatible, -you'll need to use these functions whose interfaces are the same as those -in Berkeley @sc{unix}. - -@menu -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu - -@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions -@subsection BSD Regular Expression Compiling - -With Berkeley @sc{unix}, you can only search for a given regular -expression; you can't match one. To search for it, you must first -compile it. Before you compile it, you must indicate the regular -expression syntax you want it compiled according to by setting the -variable @code{re_syntax_options} (declared in @file{regex.h} to some -syntax (@pxref{Regular Expression Syntax}). - -To compile a regular expression use: - -@findex re_comp -@example -char * -re_comp (char *@var{regex}) -@end example - -@noindent -@var{regex} is the address of a null-terminated regular expression. -@code{re_comp} uses an internal pattern buffer, so you can use only the -most recently compiled pattern buffer. This means that if you want to -use a given regular expression that you've already compiled---but it -isn't the latest one you've compiled---you'll have to recompile it. If -you call @code{re_comp} with the null string (@emph{not} the empty -string) as the argument, it doesn't change the contents of the pattern -buffer. - -If @code{re_comp} successfully compiles the regular expression, it -returns zero. If it can't compile the regular expression, it returns -an error string. @code{re_comp}'s error messages are identical to those -of @code{re_compile_pattern} (@pxref{GNU Regular Expression -Compiling}). - -@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions -@subsection BSD Searching - -Searching the Berkeley @sc{unix} way means searching in a string -starting at its first character and trying successive positions within -it to find a match. Once you've compiled a pattern using @code{re_comp} -(@pxref{BSD Regular Expression Compiling}), you can ask Regex -to search for that pattern in a string using: - -@findex re_exec -@example -int -re_exec (char *@var{string}) -@end example - -@noindent -@var{string} is the address of the null-terminated string in which you -want to search. - -@code{re_exec} returns either 1 for success or 0 for failure. It -automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}). - - -@node Copying, Index, Programming with Regex, Top -@appendix GNU GENERAL PUBLIC LICENSE -@center Version 2, June 1991 - -@display -Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc. -675 Mass Ave, Cambridge, MA 02139, USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display - -@unnumberedsec Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software---to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - -@iftex -@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end iftex -@ifinfo -@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end ifinfo - -@enumerate -@item -This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The ``Program'', below, -refers to any such program or work, and a ``work based on the Program'' -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term ``modification''.) Each licensee is addressed as ``you''. - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - -@item -You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - -@item -You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - -@enumerate a -@item -You must cause the modified files to carry prominent notices -stating that you changed the files and the date of any change. - -@item -You must cause any work that you distribute or publish, that in -whole or in part contains or is derived from the Program or any -part thereof, to be licensed as a whole at no charge to all third -parties under the terms of this License. - -@item -If the modified program normally reads commands interactively -when run, you must cause it, when started running for such -interactive use in the most ordinary way, to print or display an -announcement including an appropriate copyright notice and a -notice that there is no warranty (or else, saying that you provide -a warranty) and that users may redistribute the program under -these conditions, and telling the user how to view a copy of this -License. (Exception: if the Program itself is interactive but -does not normally print such an announcement, your work based on -the Program is not required to print an announcement.) -@end enumerate - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - -@item -You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - -@enumerate a -@item -Accompany it with the complete corresponding machine-readable -source code, which must be distributed under the terms of Sections -1 and 2 above on a medium customarily used for software interchange; or, - -@item -Accompany it with a written offer, valid for at least three -years, to give any third party, for a charge no more than your -cost of physically performing source distribution, a complete -machine-readable copy of the corresponding source code, to be -distributed under the terms of Sections 1 and 2 above on a medium -customarily used for software interchange; or, - -@item -Accompany it with the information you received as to the offer -to distribute corresponding source code. (This alternative is -allowed only for noncommercial distribution and only if you -received the program in object code or executable form with such -an offer, in accord with Subsection b above.) -@end enumerate - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - -@item -You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - -@item -You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - -@item -Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - -@item -If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - -@item -If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - -@item -The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and ``any -later version'', you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - -@item -If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - -@iftex -@heading NO WARRANTY -@end iftex -@ifinfo -@center NO WARRANTY -@end ifinfo - -@item -BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - -@item -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -@end enumerate - -@iftex -@heading END OF TERMS AND CONDITIONS -@end iftex -@ifinfo -@center END OF TERMS AND CONDITIONS -@end ifinfo - -@page -@unnumberedsec Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - -@smallexample -@var{one line to give the program's name and a brief idea of what it does.} -Copyright (C) 19@var{yy} @var{name of author} - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -@end smallexample - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - -@smallexample -Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author} -Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. -This is free software, and you are welcome to redistribute it -under certain conditions; type `show c' for details. -@end smallexample - -The hypothetical commands @samp{show w} and @samp{show c} should show -the appropriate parts of the General Public License. Of course, the -commands you use may be called something other than @samp{show w} and -@samp{show c}; they could even be mouse-clicks or menu items---whatever -suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a ``copyright disclaimer'' for the program, if -necessary. Here is a sample; alter the names: - -@example -Yoyodyne, Inc., hereby disclaims all copyright interest in the program -`Gnomovision' (which makes passes at compilers) written by James Hacker. - -@var{signature of Ty Coon}, 1 April 1989 -Ty Coon, President of Vice -@end example - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. - - -@node Index, , Copying, Top -@unnumbered Index - -@printindex cp - -@contents - -@bye diff --git a/regex-0.12/test/ChangeLog b/regex-0.12/test/ChangeLog @@ -1,77 +0,0 @@ -Thu Mar 25 21:23:43 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * debugmalloc.c: #include <string.h>, and remove declaration of - memcpy. - -Sun Dec 13 20:59:32 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * tregress.c (test_regress): Add regression test for matching - "[a-a]" against "a" with the upcase translation map. - - * iregex.c (print_regs): Don't print a newline after the register - contents. - (main): Instead, write out newlines here after printing match and - search results; this way, we get a newline whether or not the - pattern matched. - -Fri Dec 11 03:30:50 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * tregress.c (test_regress): Add new test to catch bug fixed by - change to regex.c today. - - * Makefile.in (dregex.o): Depend on `../regex.[ch]', not `regex.[ch]'. - -Sun Nov 15 07:51:40 1992 Karl Berry (karl@cs.umb.edu) - - * debugmalloc.c (memcpy): Declare; also, include <assert.h>. - - * psx-interf.c (fill_pmatch): Declare offsets as `regoff_t' - instead of `off_t'. - -Thu Nov 12 11:29:58 1992 Karl Berry (karl@cs.umb.edu) - - * iregex.c (main): Remove unused variable `c'; initialize - the char array in C code; only call print_regs if the match and - search succeeded. - (strlen): Declare. - - * tregress.c (test_regress): Bug from enami. - -Tue Nov 10 10:36:53 1992 Karl Berry (karl@cs.umb.edu) - - * tregress.c (test_regress): Remove Emacs 19 diff bug from rms, as - it was never the right thing to test anyway, and the test itself - had bugs in it. - -Mon Nov 9 10:09:40 1992 Karl Berry (karl@cs.umb.edu) - - * tregress.c (test_regress): Bug from meyering. - -Thu Sep 24 10:48:34 1992 Karl Berry (karl@cs.umb.edu) - - * Makefile.in: avoid $< (except in implicit rule). - -Sat Sep 19 15:38:29 1992 Karl Berry (karl@hayley) - - * Makefile.in (TAGS): include regex.c and regex.h. - -Wed Sep 16 09:29:27 1992 Karl Berry (karl@hayley) - - * xmalloc.c (xmalloc): use char *, not void *, as some compilers - bomb out on the latter. - - * Makefile.in (LOADLIBES): use LIBS instead, as that what's - Autoconf wants to define. - - * other.c: remove tests for ^/$ around newlines. - -Tue Sep 15 11:01:15 1992 Karl Berry (karl@hayley) - - * fileregex.c (main): call re_search_2 instead of re_search. - - * Makefile.in (regex.o): make target dregex.o, so VPATH doesn't - find ../regex.o. - -Sun Sep 13 06:50:03 1992 Karl Berry (karl@hayley) - - * Created. diff --git a/regex-0.12/test/TAGS b/regex-0.12/test/TAGS @@ -1,373 +0,0 @@ - -.././regex.c,4137 -#define AT_STRINGS_BEG(3078,98376 -#define AT_STRINGS_END(3079,98449 -#define AT_WORD_BOUNDARY(3093,99002 -#define BUF_PUSH(887,24995 -#define BUF_PUSH_2(895,25208 -#define BUF_PUSH_3(904,25437 -#define DEBUG_POP(2336,74614 -#define DEBUG_PRINT1(471,14296 -#define DEBUG_PRINT1(785,21263 -#define DEBUG_PRINT2(472,14342 -#define DEBUG_PRINT3(473,14398 -#define DEBUG_PRINT3(787,21316 -#define DEBUG_PRINT4(474,14462 -#define DEBUG_PRINT_COMPILED_PATTERN(475,14534 -#define DEBUG_PRINT_COMPILED_PATTERN(789,21386 -#define DEBUG_PRINT_DOUBLE_STRING(477,14637 -#define DEBUG_PUSH(2338,74684 -#define DEBUG_STATEMENT(470,14267 -#define DOUBLE_FAIL_STACK(2299,73230 -#define EVER_MATCHED_SOMETHING(3028,96680 -#define EXTEND_BUFFER(941,26834 -#define EXTRACT_NUMBER(403,12499 -#define EXTRACT_NUMBER(422,12960 -#define EXTRACT_NUMBER_AND_INCR(430,13181 -#define EXTRACT_NUMBER_AND_INCR(448,13583 -#define FAIL_STACK_EMPTY(2271,72289 -#define FAIL_STACK_FULL(2273,72404 -#define FAIL_STACK_PTR_EMPTY(2272,72344 -#define FAIL_STACK_TOP(2274,72473 -#define FIRST_STRING_P(221,5848 -#define FREE_VAR(3100,99186 -#define FREE_VARIABLES(3101,99240 -#define FREE_VARIABLES(3116,99751 -#define GET_BUFFER_SPACE(882,24802 -#define GET_UNSIGNED_NUMBER(1017,29312 -#define INIT_FAIL_STACK(2279,72612 -#define INSERT_JUMP(923,26079 -#define INSERT_JUMP2(927,26236 -#define ISALNUM(147,3407 -#define ISALPHA(148,3455 -#define ISBLANK(135,3062 -#define ISBLANK(137,3116 -#define ISCNTRL(149,3503 -#define ISDIGIT(146,3359 -#define ISGRAPH(140,3185 -#define ISGRAPH(142,3239 -#define ISLOWER(150,3551 -#define ISPRINT(145,3311 -#define ISPUNCT(151,3599 -#define ISSPACE(152,3647 -#define ISUPPER(153,3695 -#define ISXDIGIT(154,3743 -#define IS_ACTIVE(3026,96578 -#define IS_CHAR_CLASS(1035,29793 -#define MATCHED_SOMETHING(3027,96621 -#define MAX(233,6292 -#define MIN(234,6334 -#define PATFETCH(852,23769 -#define PATFETCH_RAW(860,24020 -#define POINTER_TO_OFFSET(3050,97433 -#define POP_FAILURE_ITEM(2331,74426 -#define POP_FAILURE_POINT(2461,79538 -#define PREFETCH(3064,97916 -#define PUSH_FAILURE_ITEM(2327,74253 -#define PUSH_FAILURE_POINT(2352,75048 -#define PUSH_PATTERN_OP(2317,73841 -#define REGEX_REALLOCATE(185,4875 -#define REGEX_REALLOCATE(210,5495 -#define REGEX_TALLOC(227,6137 -#define REG_MATCH_NULL_STRING_P(3025,96511 -#define REG_UNSET(3055,97649 -#define RETALLOC(226,6058 -#define SET_LIST_BIT(1011,29089 -#define SET_REGS_MATCHED(3034,96936 -#define SIGN_EXTEND_CHAR(166,4109 -#define SIGN_EXTEND_CHAR(169,4217 -#define STORE_JUMP(915,25800 -#define STORE_JUMP2(919,25917 -#define STORE_NUMBER(384,11919 -#define STORE_NUMBER_AND_INCR(394,12242 -#define STREQ(231,6244 -#define SYNTAX(120,2790 -#define TALLOC(225,6003 -#define TRANSLATE(873,24503 -#define WORDCHAR_P(3086,98755 -alt_match_null_string_p 4466,149039 -#define assert(782,21217 -at_begline_loc_p 2131,67979 -at_endline_loc_p 2150,68557 -#define bcmp(54,1656 -bcmp_translate 4591,151831 -#define bcopy(57,1726 -typedef char boolean;236,6377 -#define bzero(60,1793 -common_op_match_null_string_p 4503,149895 -compile_range 2200,69997 -} compile_stack_elt_t;990,28602 -} compile_stack_type;998,28748 -extract_number 411,12714 -extract_number_and_incr 438,13370 -} fail_stack_type;2269,72269 -group_in_compile_stack 2172,69174 -group_match_null_string_p 4357,145267 -init_syntax_once 94,2365 -insert_op1 2091,67107 -insert_op2 2110,67475 -#define isascii(131,3018 -typedef int pattern_offset_t;981,28388 -print_compiled_pattern 726,19792 -print_double_string 753,20605 -print_fastmap 486,14835 -print_partial_compiled_pattern 518,15475 -re_comp 4650,153479 -re_compile_fastmap 2532,82428 -re_compile_pattern 4617,152520 -re_exec 4688,154373 -re_match 3136,100557 -re_match_2 3161,101399 -} re_opcode_t;378,11781 -re_search 2844,90872 -re_search_2 2877,91998 -re_set_registers 2817,90247 -re_set_syntax 808,22087 -regcomp 4736,155972 -regerror 4876,160188 -regex_compile 1062,30922 -regexec 4811,158371 -regfree 4920,161247 -} register_info_type;3023,96488 -typedef unsigned regnum_t;974,28172 -store_op1 2063,66535 -store_op2 2076,66768 -typedef const unsigned 2262,72103 - -.././regex.h,230 -#define _RE_ARGS(394,14981 -#define _RE_ARGS(398,15036 -} reg_errcode_t;270,10874 -typedef unsigned reg_syntax_t;38,1503 -typedef struct re_pattern_buffer regex_t;346,13556 -} regmatch_t;382,14634 -typedef int regoff_t;354,13814 - -getpagesize.h,84 -#define getpagesize(12,137 -#define getpagesize(15,191 -#define getpagesize(20,302 - -test.h,436 -#define BRACES_TO_OPS(107,3169 -#define INVALID_PATTERN(110,3328 -#define MATCH_SELF(114,3429 -#define PARENS_TO_OPS(108,3248 -#define SAFE_STRLEN(14,201 -#define TEST_POSITIONED_MATCH(116,3470 -#define TEST_REGISTERS(104,3011 -#define TEST_REGISTERS_2(97,2703 -#define TEST_SEARCH(127,3875 -#define TEST_SEARCH_2(123,3720 -#define TEST_TRUNCATED_MATCH(120,3608 -typedef enum { false = 0, true = 1 } boolean;16,255 -} test_type;33,572 - -alloca.c,128 -alloca 141,3996 -find_stack_direction 85,2553 -} header;127,3538 -typedef void *pointer;51,1721 -typedef char *pointer;53,1778 - -bsd-interf.c,51 -test_berk_search 8,106 -test_bsd_interface 33,738 - -debugmalloc.c,395 -#define TRACE(8,143 -#define TRACE1(9,197 -#define TRACE2(10,254 -#define TRACE3(11,319 -#define TRACE4(12,392 -#define USER_ALLOC(61,1440 -typedef char *address;15,480 -} *chunk;54,1225 -chunk_delete 115,2778 -chunk_insert 96,2294 -chunk_to_mem 79,1916 -free 261,5604 -free_list_available 175,3947 -malloc 203,4343 -mem_to_chunk 68,1703 -realloc 242,5309 -validate_list 153,3478 -xsbrk 21,545 - -emacsmalloc.c,574 -#define ASSERT(178,5884 -#define ASSERT(181,5985 -#define CHAIN(166,5430 -#define bcmp(73,2821 -#define bcopy(72,2777 -#define bzero(74,2868 -calloc 603,15983 -free 484,13255 -get_lim_data 736,18517 -get_lim_data 752,18767 -get_lim_data 759,18860 -getpool 374,10263 -malloc 413,11133 -malloc_init 218,6863 -malloc_mem_free 707,17940 -malloc_mem_used 688,17683 -malloc_stats 663,17320 -malloc_usable_size 233,7147 -memalign 618,16164 -morecore 244,7380 -realloc 541,14424 -#define start_of_data(110,3486 -#define start_of_data(115,3546 -sys_sbrk 815,20804 -valloc 645,17031 - -fileregex.c,13 -main 11,156 - -g++malloc.c,1543 -#define UPDATE_STATS(33,1090 -#define UPDATE_STATS(35,1131 -static inline int aligned_OK(343,11189 -void* calloc(1039,28692 -void cfree(1048,28894 -static inline void* chunk2mem(619,19336 -#define clear_inuse(592,18767 -static inline void consollink(716,21398 -static void do_free_stats(544,18016 -static void do_malloc_stats(534,17741 -766,22304 -extern 762,22235 - for 1260,34165 -void free(1028,28553 -static inline void frontlink(732,21717 -static unsigned int gcd(557,18251 - if 1212,32427 - if 1216,32582 - if 1220,32737 - if 1224,32880 - if 1229,33094 - if 1233,33251 - if 1238,33463 - if 1242,33609 - if 1247,33739 -#define inuse(590,18680 -static inline unsigned int lcm(580,18540 -void* malloc(939,26370 -static mchunkptr malloc_find_space(858,24561 -void malloc_stats(1201,32256 -unsigned int malloc_usable_size(1054,28936 -static volatile void malloc_user_error(286,9757 -static void malloc_user_error(288,9804 -typedef struct malloc_bin* mbinptr;320,10636 -typedef struct malloc_chunk* mchunkptr;309,10247 -static inline mchunkptr mem2chunk(643,19759 -void* memalign(1118,30363 -#define next_chunk(600,18910 -#define prev_chunk(604,19023 -void* realloc(1071,29263 -static inline unsigned int request2size(335,10993 -mchunkptr sanity_check(628,19486 -#define set_inuse(591,18723 -static inline void set_size(609,19149 -static inline mbinptr size2bin(499,16914 -static inline void split(685,20463 -static 768,22312 -static inline void unlink(671,20263 -void* valloc(1194,32107 -typedef volatile void 760,22184 -764,22271 - -iregex.c,54 -main 20,390 -print_regs 141,2638 -scanstring 87,1839 - -main.c,13 -main 12,242 - -malloc-test.c,112 -#define BITS_BLOCK(12,168 -#define BITS_MASK(13,228 -} bits_list_type;6,56 -init_bits_list 16,311 -main(32,621 - -other.c,18 -test_others 6,96 - -printchar.c,15 -printchar 2,5 - -psx-basic.c,23 -test_posix_basic 7,84 - -psx-extend.c,26 -test_posix_extended 7,88 - -psx-generic.c,26 -test_posix_generic 8,117 - -psx-group.c,20 -test_grouping 7,92 - -psx-interf.c,416 -fill_pmatch 174,4802 -get_error_string 18,260 -init_pattern_buffer 49,1434 -test_compile 67,1925 -test_eflags 245,6876 -test_error_code_allocation 562,16619 -test_error_code_message 524,15247 -test_ignore_case 303,8525 -test_newline 330,9199 -test_nsub 117,3319 -test_pmatch 188,5121 -test_posix_interface 614,18719 -test_posix_match 359,9938 -test_regcomp 138,3725 -test_regerror 592,17621 -test_regexec 394,10783 - -psx-interv.c,21 -test_intervals 6,93 - -test.c,607 -#define SET_FASTMAP(447,13999 -#define bcmp(18,362 -#define bcopy(19,415 -#define bzero(20,473 -compile_and_print_pattern 666,19653 -concat 97,2673 -delimiters_to_ops 571,17477 -general_test 115,2996 -invalid_pattern 542,16821 -#define memcmp(26,611 -#define memcpy(27,660 -print_pattern_info 635,18998 -set_all_registers 58,1390 -test_all_registers 506,15567 -test_case_fold 682,19993 -test_fastmap 460,14363 -test_fastmap_search 474,14668 -test_match 776,22235 -test_match_2 766,22040 -test_match_n_times 715,20798 -test_search_return 408,13011 -valid_nonposix_pattern 646,19239 -valid_pattern 557,17182 - -tregress.c,208 -#define SIMPLE_MATCH(74,1463 -#define SIMPLE_NONMATCH(75,1528 -do_match 78,1599 -itoa 10,199 -simple_compile 44,882 -simple_fail 21,353 -simple_fastmap 55,1115 -simple_search 100,2020 -test_regress 124,2513 - -upcase.c,0 - -xmalloc.c,14 -xmalloc 9,87 diff --git a/regex-0.12/test/alloca.c b/regex-0.12/test/alloca.c @@ -1,194 +0,0 @@ -/* - alloca -- (mostly) portable public-domain implementation -- D A Gwyn - - last edit: 86/05/30 rms - include config.h, since on VMS it renames some symbols. - Use xmalloc instead of malloc. - - This implementation of the PWB library alloca() function, - which is used to allocate space off the run-time stack so - that it is automatically reclaimed upon procedure exit, - was inspired by discussions with J. Q. Johnson of Cornell. - - It should work under any C implementation that uses an - actual procedure stack (as opposed to a linked list of - frames). There are some preprocessor constants that can - be defined when compiling for your specific system, for - improved efficiency; however, the defaults should be okay. - - The general concept of this implementation is to keep - track of all alloca()-allocated blocks, and reclaim any - that are found to be deeper in the stack than the current - invocation. This heuristic does not reclaim storage as - soon as it becomes invalid, but it will do so eventually. - - As a special case, alloca(0) reclaims storage without - allocating any. It is a good idea to use alloca(0) in - your main control loop, etc. to force garbage collection. -*/ -#ifndef lint -static char SCCSid[] = "@(#)alloca.c 1.1"; /* for the "what" utility */ -#endif - -#ifdef emacs -#include "config.h" -#ifdef static -/* actually, only want this if static is defined as "" - -- this is for usg, in which emacs must undefine static - in order to make unexec workable - */ -#ifndef STACK_DIRECTION -you -lose --- must know STACK_DIRECTION at compile-time -#endif /* STACK_DIRECTION undefined */ -#endif /* static */ -#endif /* emacs */ - -#ifndef alloca /* If compiling with GCC, this file's not needed. */ - -#ifdef __STDC__ -typedef void *pointer; /* generic pointer type */ -#else -typedef char *pointer; /* generic pointer type */ -#endif - -#define NULL 0 /* null pointer constant */ - -extern void free(); -extern pointer xmalloc(); - -/* - Define STACK_DIRECTION if you know the direction of stack - growth for your system; otherwise it will be automatically - deduced at run-time. - - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown -*/ - -#ifndef STACK_DIRECTION -#define STACK_DIRECTION 0 /* direction unknown */ -#endif - -#if STACK_DIRECTION != 0 - -#define STACK_DIR STACK_DIRECTION /* known at compile-time */ - -#else /* STACK_DIRECTION == 0; need run-time code */ - -static int stack_dir; /* 1 or -1 once known */ -#define STACK_DIR stack_dir - -static void -find_stack_direction (/* void */) -{ - static char *addr = NULL; /* address of first - `dummy', once known */ - auto char dummy; /* to get stack address */ - - if (addr == NULL) - { /* initial entry */ - addr = &dummy; - - find_stack_direction (); /* recurse once */ - } - else /* second entry */ - if (&dummy > addr) - stack_dir = 1; /* stack grew upward */ - else - stack_dir = -1; /* stack grew downward */ -} - -#endif /* STACK_DIRECTION == 0 */ - -/* - An "alloca header" is used to: - (a) chain together all alloca()ed blocks; - (b) keep track of stack depth. - - It is very important that sizeof(header) agree with malloc() - alignment chunk size. The following default should work okay. -*/ - -#ifndef ALIGN_SIZE -#define ALIGN_SIZE sizeof(double) -#endif - -typedef union hdr -{ - char align[ALIGN_SIZE]; /* to force sizeof(header) */ - struct - { - union hdr *next; /* for chaining headers */ - char *deep; /* for stack depth measure */ - } h; -} header; - -/* - alloca( size ) returns a pointer to at least `size' bytes of - storage which will be automatically reclaimed upon exit from - the procedure that called alloca(). Originally, this space - was supposed to be taken from the current stack frame of the - caller, but that method cannot be made to work for some - implementations of C, for example under Gould's UTX/32. -*/ - -static header *last_alloca_header = NULL; /* -> last alloca header */ - -pointer -alloca (size) /* returns pointer to storage */ - unsigned size; /* # bytes to allocate */ -{ - auto char probe; /* probes stack depth: */ - register char *depth = &probe; - -#if STACK_DIRECTION == 0 - if (STACK_DIR == 0) /* unknown growth direction */ - find_stack_direction (); -#endif - - /* Reclaim garbage, defined as all alloca()ed storage that - was allocated from deeper in the stack than currently. */ - - { - register header *hp; /* traverses linked list */ - - for (hp = last_alloca_header; hp != NULL;) - if ((STACK_DIR > 0 && hp->h.deep > depth) - || (STACK_DIR < 0 && hp->h.deep < depth)) - { - register header *np = hp->h.next; - - free ((pointer) hp); /* collect garbage */ - - hp = np; /* -> next header */ - } - else - break; /* rest are not deeper */ - - last_alloca_header = hp; /* -> last valid storage */ - } - - if (size == 0) - return NULL; /* no allocation required */ - - /* Allocate combined header + user data storage. */ - - { - register pointer new = xmalloc (sizeof (header) + size); - /* address of header */ - - ((header *)new)->h.next = last_alloca_header; - ((header *)new)->h.deep = depth; - - last_alloca_header = (header *)new; - - /* User storage begins just after header. */ - - return (pointer)((char *)new + sizeof(header)); - } -} - -#endif /* no alloca */ diff --git a/regex-0.12/test/bsd-interf.c b/regex-0.12/test/bsd-interf.c @@ -1,38 +0,0 @@ -/* bsd-interf.c: test BSD interface. */ - -#ifndef _POSIX_SOURCE /* whole file */ - -#include "test.h" - -void -test_berk_search (pattern, string) - const char *pattern; - char *string; -{ - const char *return_value = re_comp (pattern); - - if (return_value != 0) - { - printf ("This didn't compile: `%s'.\n", pattern); - printf (" The error message was: `%s'.\n", return_value); - } - else - if (test_should_match && re_exec (string) != strlen (string)) - { - printf ("Should have matched but didn't:\n"); - printf (" The pattern was: %s.\n", pattern); - if (string) - printf (" The string was: `%s'.'n", string); - else - printf (" The string was empty.\n"); - } -} - - -void -test_bsd_interface () -{ - test_berk_search ("a", "ab"); -} - -#endif /* _POSIX_SOURCE */ diff --git a/regex-0.12/test/debugmalloc.c b/regex-0.12/test/debugmalloc.c @@ -1,273 +0,0 @@ -/* debugmalloc.c: a malloc for debugging purposes. */ - -#include <stdio.h> -#include <assert.h> -#include <string.h> - -static unsigned trace = 0; -#define TRACE(s) if (trace) fprintf (stderr, "%s", s) -#define TRACE1(s, e1) if (trace) fprintf (stderr, s, e1) -#define TRACE2(s, e1, e2) if (trace) fprintf (stderr, s, e1, e2) -#define TRACE3(s, e1, e2, e3) if (trace) fprintf (stderr, s, e1, e2, e3) -#define TRACE4(s, e1, e2, e3, e4) \ - if (trace) fprintf (stderr, s, e1, e2, e3, e4) - -typedef char *address; - - -/* Wrap our calls to sbrk. */ - -address -xsbrk (incr) - int incr; -{ - extern char *sbrk (); - address ret = sbrk (incr); - - if (ret == (address) -1) - { - perror ("sbrk"); /* Actually, we should return NULL, not quit. */ - abort (); - } - - return ret; -} - - - -typedef struct chunk_struct -{ - /* This is the size (in bytes) that has actually been actually - allocated, not the size that the user requested. */ - unsigned alloc_size; - - /* This is the size the user requested. */ - unsigned user_size; - - /* Points to the next block in one of the lists. */ - struct chunk_struct *next; - - /* Now comes the user's memory. */ - address user_mem; - - /* After the user's memory is a constant. */ -} *chunk; - -#define MALLOC_OVERHEAD 16 - -/* We might play around with the `user_size' field, but the amount of - memory that is actually available in the chunk is always the size - allocated minus the overhead. */ -#define USER_ALLOC(c) ((c)->alloc_size - MALLOC_OVERHEAD) - -/* Given a pointer to a malloc-allocated block, the beginning of the - chunk should always be MALLOC_OVERHEAD - 4 bytes back, since the only - overhead after the user memory is the constant. */ - -chunk -mem_to_chunk (mem) - address mem; -{ - return (chunk) (mem - (MALLOC_OVERHEAD - 4)); -} - - -/* The other direction is even easier, since the user's memory starts at - the `user_mem' member in the chunk. */ - -address -chunk_to_mem (c) - chunk c; -{ - return (address) &(c->user_mem); -} - - - -/* We keep both all the allocated chunks and all the free chunks on - lists. Since we put the next pointers in the chunk structure, we - don't need a separate chunk_list structure. */ -chunk alloc_list = NULL, free_list = NULL; - - -/* We always append the new chunk at the beginning of the list. */ - -void -chunk_insert (chunk_list, new_c) - chunk *chunk_list; - chunk new_c; -{ - chunk c = *chunk_list; /* old beginning of list */ - - TRACE3 (" Inserting 0x%x at the beginning of 0x%x, before 0x%x.\n", - new_c, chunk_list, c); - - *chunk_list = new_c; - new_c->next = c; -} - - -/* Thus, removing an element means we have to search until we find it. - Have to delete before we insert, since insertion changes the next - pointer, which we need to put it on the other list. */ - -void -chunk_delete (chunk_list, dead_c) - chunk *chunk_list; - chunk dead_c; -{ - chunk c = *chunk_list; - chunk prev_c = NULL; - - TRACE2 (" Deleting 0x%x from 0x%x:", dead_c, chunk_list); - - while (c != dead_c && c != NULL) - { - TRACE1 (" 0x%x", c); - prev_c = c; - c = c->next; - } - - if (c == NULL) - { - fprintf (stderr, "Chunk at 0x%x not found on list.\n", dead_c); - abort (); - } - - if (prev_c == NULL) - { - TRACE1 (".\n Setting head to 0x%x.\n", c->next); - *chunk_list = c->next; - } - else - { - TRACE2 (".\n Linking next(0x%x) to 0x%x.\n", prev_c, c->next); - prev_c->next = c->next; - } -} - - -/* See if a list is hunky-dory. */ - -void -validate_list (chunk_list) - chunk *chunk_list; -{ - chunk c; - - TRACE1 (" Validating list at 0x%x:", chunk_list); - - for (c = *chunk_list; c != NULL; c = c->next) - { - assert (c->user_size < c->alloc_size); - assert (memcmp (chunk_to_mem (c) + c->user_size, "Karl", 4)); - TRACE2 (" 0x%x/%d", c, c->user_size); - } - - TRACE (".\n"); -} - - -/* See if we have a free chunk of a given size. We'll take the first - one that is big enough. */ - -chunk -free_list_available (needed) - unsigned needed; -{ - chunk c; - - TRACE1 (" Checking free list for %d bytes:", needed); - - if (free_list == NULL) - { - return NULL; - } - - c = free_list; - - while (c != NULL && USER_ALLOC (c) < needed) - { - TRACE2 (" 0x%x/%d", c, USER_ALLOC (c)); - c = c->next; - } - - TRACE1 ("\n Returning 0x%x.\n", c); - return c; -} - - - - -address -malloc (n) - unsigned n; -{ - address new_mem; - chunk c; - - TRACE1 ("Mallocing %d bytes.\n", n); - - validate_list (&free_list); - validate_list (&alloc_list); - - c = free_list_available (n); - - if (c == NULL) - { /* Nothing suitable on free list. Allocate a new chunk. */ - TRACE (" not on free list.\n"); - c = (chunk) xsbrk (n + MALLOC_OVERHEAD); - c->alloc_size = n + MALLOC_OVERHEAD; - } - else - { /* Found something on free list. Don't split it, just use as is. */ - TRACE (" found on free list.\n"); - chunk_delete (&free_list, c); - } - - /* If we took this from the free list, then the user size might be - different now, and consequently the constant at the end might be in - the wrong place. */ - c->user_size = n; - new_mem = chunk_to_mem (c); - memcpy (new_mem + n, "Karl", 4); - chunk_insert (&alloc_list, c); - - TRACE2 ("Malloc returning 0x%x (chunk 0x%x).\n", new_mem, c); - return new_mem; -} - - -address -realloc (mem, n) - address mem; - unsigned n; -{ - void free (); - chunk c = mem_to_chunk (mem); - address new_mem; - - TRACE3 ("Reallocing %d bytes at 0x%x (chunk 0x%x).\n", n, mem, c); - - new_mem = malloc (n); - memcpy (new_mem, mem, c->user_size); - free (mem); - - return new_mem; -} - - -void -free (mem) - address mem; -{ - chunk c = mem_to_chunk (mem); - - TRACE2 ("Freeing memory at 0x%x (chunk at 0x%x).\n", mem, c); - - validate_list (&free_list); - validate_list (&alloc_list); - - chunk_delete (&alloc_list, c); - chunk_insert (&free_list, c); -} diff --git a/regex-0.12/test/emacsmalloc.c b/regex-0.12/test/emacsmalloc.c @@ -1,844 +0,0 @@ -/* dynamic memory allocation for GNU. - Copyright (C) 1985, 1987 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -In other words, you are welcome to use, share and improve this program. -You are forbidden to forbid anyone else to use, share and improve -what you give them. Help stamp out software-hoarding! */ - - -/* - * @(#)nmalloc.c 1 (Caltech) 2/21/82 - * - * U of M Modified: 20 Jun 1983 ACT: strange hacks for Emacs - * - * Nov 1983, Mike@BRL, Added support for 4.1C/4.2 BSD. - * - * This is a very fast storage allocator. It allocates blocks of a small - * number of different sizes, and keeps free lists of each size. Blocks - * that don't exactly fit are passed up to the next larger size. In this - * implementation, the available sizes are (2^n)-4 (or -16) bytes long. - * This is designed for use in a program that uses vast quantities of - * memory, but bombs when it runs out. To make it a little better, it - * warns the user when he starts to get near the end. - * - * June 84, ACT: modified rcheck code to check the range given to malloc, - * rather than the range determined by the 2-power used. - * - * Jan 85, RMS: calls malloc_warning to issue warning on nearly full. - * No longer Emacs-specific; can serve as all-purpose malloc for GNU. - * You should call malloc_init to reinitialize after loading dumped Emacs. - * Call malloc_stats to get info on memory stats if MSTATS turned on. - * realloc knows how to return same block given, just changing its size, - * if the power of 2 is correct. - */ - -/* - * nextf[i] is the pointer to the next free block of size 2^(i+3). The - * smallest allocatable block is 8 bytes. The overhead information will - * go in the first int of the block, and the returned pointer will point - * to the second. - * -#ifdef MSTATS - * nmalloc[i] is the difference between the number of mallocs and frees - * for a given block size. -#endif MSTATS - */ - -#ifdef emacs -/* config.h specifies which kind of system this is. */ -#include "config.h" -#include <signal.h> -#else - -/* Determine which kind of system this is. */ -#include <sys/types.h> -#include <signal.h> - -#include <string.h> -#define bcopy(s,d,n) memcpy ((d), (s), (n)) -#define bcmp(s1,s2,n) memcmp ((s1), (s2), (n)) -#define bzero(s,n) memset ((s), 0, (n)) - -#ifndef SIGTSTP -#ifndef VMS -#ifndef USG -#define USG -#endif -#endif /* not VMS */ -#else /* SIGTSTP */ -#ifdef SIGIO -#define BSD4_2 -#endif /* SIGIO */ -#endif /* SIGTSTP */ - -#endif /* not emacs */ - -/* Define getpagesize () if the system does not. */ -#include "getpagesize.h" - -#ifdef BSD -#ifdef BSD4_1 -#include <sys/vlimit.h> /* warn the user when near the end */ -#else /* if 4.2 or newer */ -#include <sys/time.h> -#include <sys/resource.h> -#endif /* if 4.2 or newer */ -#endif - -#ifdef VMS -#include "vlimit.h" -#endif - -extern char *start_of_data (); - -#ifdef BSD -#ifndef DATA_SEG_BITS -#define start_of_data() &etext -#endif -#endif - -#ifndef emacs -#define start_of_data() &etext -#endif - -#define ISALLOC ((char) 0xf7) /* magic byte that implies allocation */ -#define ISFREE ((char) 0x54) /* magic byte that implies free block */ - /* this is for error checking only */ -#define ISMEMALIGN ((char) 0xd6) /* Stored before the value returned by - memalign, with the rest of the word - being the distance to the true - beginning of the block. */ - -extern char etext; - -/* These two are for user programs to look at, when they are interested. */ - -unsigned int malloc_sbrk_used; /* amount of data space used now */ -unsigned int malloc_sbrk_unused; /* amount more we can have */ - -/* start of data space; can be changed by calling init_malloc */ -static char *data_space_start; - -#ifdef MSTATS -static int nmalloc[30]; -static int nmal, nfre; -#endif /* MSTATS */ - -/* If range checking is not turned on, all we have is a flag indicating - whether memory is allocated, an index in nextf[], and a size field; to - realloc() memory we copy either size bytes or 1<<(index+3) bytes depending - on whether the former can hold the exact size (given the value of - 'index'). If range checking is on, we always need to know how much space - is allocated, so the 'size' field is never used. */ - -struct mhead { - char mh_alloc; /* ISALLOC or ISFREE */ - char mh_index; /* index in nextf[] */ -/* Remainder are valid only when block is allocated */ - unsigned short mh_size; /* size, if < 0x10000 */ -#ifdef rcheck - unsigned mh_nbytes; /* number of bytes allocated */ - int mh_magic4; /* should be == MAGIC4 */ -#endif /* rcheck */ -}; - -/* Access free-list pointer of a block. - It is stored at block + 4. - This is not a field in the mhead structure - because we want sizeof (struct mhead) - to describe the overhead for when the block is in use, - and we do not want the free-list pointer to count in that. */ - -#define CHAIN(a) \ - (*(struct mhead **) (sizeof (char *) + (char *) (a))) - -#ifdef rcheck - -/* To implement range checking, we write magic values in at the beginning and - end of each allocated block, and make sure they are undisturbed whenever a - free or a realloc occurs. */ -/* Written in each of the 4 bytes following the block's real space */ -#define MAGIC1 0x55 -/* Written in the 4 bytes before the block's real space */ -#define MAGIC4 0x55555555 -#define ASSERT(p) if (!(p)) botch("p"); else -#define EXTRA 4 /* 4 bytes extra for MAGIC1s */ -#else -#define ASSERT(p) if (!(p)) abort (); else -#define EXTRA 0 -#endif /* rcheck */ - - -/* nextf[i] is free list of blocks of size 2**(i + 3) */ - -static struct mhead *nextf[30]; - -/* busy[i] is nonzero while allocation of block size i is in progress. */ - -static char busy[30]; - -/* Number of bytes of writable memory we can expect to be able to get */ -static unsigned int lim_data; - -/* Level number of warnings already issued. - 0 -- no warnings issued. - 1 -- 75% warning already issued. - 2 -- 85% warning already issued. -*/ -static int warnlevel; - -/* Function to call to issue a warning; - 0 means don't issue them. */ -static void (*warnfunction) (); - -/* nonzero once initial bunch of free blocks made */ -static int gotpool; - -char *_malloc_base; - -static void getpool (); - -/* Cause reinitialization based on job parameters; - also declare where the end of pure storage is. */ -void -malloc_init (start, warnfun) - char *start; - void (*warnfun) (); -{ - if (start) - data_space_start = start; - lim_data = 0; - warnlevel = 0; - warnfunction = warnfun; -} - -/* Return the maximum size to which MEM can be realloc'd - without actually requiring copying. */ - -int -malloc_usable_size (mem) - char *mem; -{ - struct mhead *p - = (struct mhead *) (mem - ((sizeof (struct mhead) + 7) & ~7)); - int blocksize = 8 << p->mh_index; - - return blocksize - sizeof (struct mhead) - EXTRA; -} - -static void -morecore (nu) /* ask system for more memory */ - register int nu; /* size index to get more of */ -{ - char *sbrk (); - register char *cp; - register int nblks; - register unsigned int siz; - int oldmask; - -#ifdef BSD -#ifndef BSD4_1 - int newmask = -1; - /* Blocking these signals interferes with debugging, at least on BSD on - the HP 9000/300. */ -#ifdef SIGTRAP - newmask &= ~(1 << SIGTRAP); -#endif -#ifdef SIGILL - newmask &= ~(1 << SIGILL); -#endif -#ifdef SIGTSTP - newmask &= ~(1 << SIGTSTP); -#endif -#ifdef SIGSTOP - newmask &= ~(1 << SIGSTOP); -#endif - oldmask = sigsetmask (newmask); -#endif -#endif - - if (!data_space_start) - { - data_space_start = start_of_data (); - } - - if (lim_data == 0) - get_lim_data (); - - /* On initial startup, get two blocks of each size up to 1k bytes */ - if (!gotpool) - { getpool (); getpool (); gotpool = 1; } - - /* Find current end of memory and issue warning if getting near max */ - -#ifndef VMS - /* Maximum virtual memory on VMS is difficult to calculate since it - * depends on several dynmacially changing things. Also, alignment - * isn't that important. That is why much of the code here is ifdef'ed - * out for VMS systems. - */ - cp = sbrk (0); - siz = cp - data_space_start; - - if (warnfunction) - switch (warnlevel) - { - case 0: - if (siz > (lim_data / 4) * 3) - { - warnlevel++; - (*warnfunction) ("Warning: past 75% of memory limit"); - } - break; - case 1: - if (siz > (lim_data / 20) * 17) - { - warnlevel++; - (*warnfunction) ("Warning: past 85% of memory limit"); - } - break; - case 2: - if (siz > (lim_data / 20) * 19) - { - warnlevel++; - (*warnfunction) ("Warning: past 95% of memory limit"); - } - break; - } - - if ((int) cp & 0x3ff) /* land on 1K boundaries */ - sbrk (1024 - ((int) cp & 0x3ff)); -#endif /* not VMS */ - - /* Take at least 2k, and figure out how many blocks of the desired size - we're about to get */ - nblks = 1; - if ((siz = nu) < 8) - nblks = 1 << ((siz = 8) - nu); - - if ((cp = sbrk (1 << (siz + 3))) == (char *) -1) - { -#ifdef BSD -#ifndef BSD4_1 - sigsetmask (oldmask); -#endif -#endif - return; /* no more room! */ - } - malloc_sbrk_used = siz; - malloc_sbrk_unused = lim_data - siz; - -#ifndef VMS - if ((int) cp & 7) - { /* shouldn't happen, but just in case */ - cp = (char *) (((int) cp + 8) & ~7); - nblks--; - } -#endif /* not VMS */ - - /* save new header and link the nblks blocks together */ - nextf[nu] = (struct mhead *) cp; - siz = 1 << (nu + 3); - while (1) - { - ((struct mhead *) cp) -> mh_alloc = ISFREE; - ((struct mhead *) cp) -> mh_index = nu; - if (--nblks <= 0) break; - CHAIN ((struct mhead *) cp) = (struct mhead *) (cp + siz); - cp += siz; - } - CHAIN ((struct mhead *) cp) = 0; - -#ifdef BSD -#ifndef BSD4_1 - sigsetmask (oldmask); -#endif -#endif -} - -static void -getpool () -{ - register int nu; - char * sbrk (); - register char *cp = sbrk (0); - - if ((int) cp & 0x3ff) /* land on 1K boundaries */ - sbrk (1024 - ((int) cp & 0x3ff)); - - /* Record address of start of space allocated by malloc. */ - if (_malloc_base == 0) - _malloc_base = cp; - - /* Get 2k of storage */ - - cp = sbrk (04000); - if (cp == (char *) -1) - return; - - /* Divide it into an initial 8-word block - plus one block of size 2**nu for nu = 3 ... 10. */ - - CHAIN (cp) = nextf[0]; - nextf[0] = (struct mhead *) cp; - ((struct mhead *) cp) -> mh_alloc = ISFREE; - ((struct mhead *) cp) -> mh_index = 0; - cp += 8; - - for (nu = 0; nu < 7; nu++) - { - CHAIN (cp) = nextf[nu]; - nextf[nu] = (struct mhead *) cp; - ((struct mhead *) cp) -> mh_alloc = ISFREE; - ((struct mhead *) cp) -> mh_index = nu; - cp += 8 << nu; - } -} - -char * -malloc (n) /* get a block */ - unsigned n; -{ - register struct mhead *p; - register unsigned int nbytes; - register int nunits = 0; - - /* Figure out how many bytes are required, rounding up to the nearest - multiple of 8, then figure out which nestf[] area to use. - Both the beginning of the header and the beginning of the - block should be on an eight byte boundary. */ - nbytes = (n + ((sizeof *p + 7) & ~7) + EXTRA + 7) & ~7; - { - register unsigned int shiftr = (nbytes - 1) >> 2; - - while (shiftr >>= 1) - nunits++; - } - - /* In case this is reentrant use of malloc from signal handler, - pick a block size that no other malloc level is currently - trying to allocate. That's the easiest harmless way not to - interfere with the other level of execution. */ - while (busy[nunits]) nunits++; - busy[nunits] = 1; - - /* If there are no blocks of the appropriate size, go get some */ - /* COULD SPLIT UP A LARGER BLOCK HERE ... ACT */ - if (nextf[nunits] == 0) - morecore (nunits); - - /* Get one block off the list, and set the new list head */ - if ((p = nextf[nunits]) == 0) - { - busy[nunits] = 0; - return 0; - } - nextf[nunits] = CHAIN (p); - busy[nunits] = 0; - - /* Check for free block clobbered */ - /* If not for this check, we would gobble a clobbered free chain ptr */ - /* and bomb out on the NEXT allocate of this size block */ - if (p -> mh_alloc != ISFREE || p -> mh_index != nunits) -#ifdef rcheck - botch ("block on free list clobbered"); -#else /* not rcheck */ - abort (); -#endif /* not rcheck */ - - /* Fill in the info, and if range checking, set up the magic numbers */ - p -> mh_alloc = ISALLOC; -#ifdef rcheck - p -> mh_nbytes = n; - p -> mh_magic4 = MAGIC4; - { - /* Get the location n after the beginning of the user's space. */ - register char *m = (char *) p + ((sizeof *p + 7) & ~7) + n; - - *m++ = MAGIC1, *m++ = MAGIC1, *m++ = MAGIC1, *m = MAGIC1; - } -#else /* not rcheck */ - p -> mh_size = n; -#endif /* not rcheck */ -#ifdef MSTATS - nmalloc[nunits]++; - nmal++; -#endif /* MSTATS */ - return (char *) p + ((sizeof *p + 7) & ~7); -} - -free (mem) - char *mem; -{ - register struct mhead *p; - { - register char *ap = mem; - - if (ap == 0) - return; - - p = (struct mhead *) (ap - ((sizeof *p + 7) & ~7)); - if (p -> mh_alloc == ISMEMALIGN) - { - ap -= p->mh_size; - p = (struct mhead *) (ap - ((sizeof *p + 7) & ~7)); - } - -#ifndef rcheck - if (p -> mh_alloc != ISALLOC) - abort (); - -#else rcheck - if (p -> mh_alloc != ISALLOC) - { - if (p -> mh_alloc == ISFREE) - botch ("free: Called with already freed block argument\n"); - else - botch ("free: Called with bad argument\n"); - } - - ASSERT (p -> mh_magic4 == MAGIC4); - ap += p -> mh_nbytes; - ASSERT (*ap++ == MAGIC1); ASSERT (*ap++ == MAGIC1); - ASSERT (*ap++ == MAGIC1); ASSERT (*ap == MAGIC1); -#endif /* rcheck */ - } - { - register int nunits = p -> mh_index; - - ASSERT (nunits <= 29); - p -> mh_alloc = ISFREE; - - /* Protect against signal handlers calling malloc. */ - busy[nunits] = 1; - /* Put this block on the free list. */ - CHAIN (p) = nextf[nunits]; - nextf[nunits] = p; - busy[nunits] = 0; - -#ifdef MSTATS - nmalloc[nunits]--; - nfre++; -#endif /* MSTATS */ - } -} - -char * -realloc (mem, n) - char *mem; - register unsigned n; -{ - register struct mhead *p; - register unsigned int tocopy; - register unsigned int nbytes; - register int nunits; - - if (mem == 0) - return malloc (n); - p = (struct mhead *) (mem - ((sizeof *p + 7) & ~7)); - nunits = p -> mh_index; - ASSERT (p -> mh_alloc == ISALLOC); -#ifdef rcheck - ASSERT (p -> mh_magic4 == MAGIC4); - { - register char *m = mem + (tocopy = p -> mh_nbytes); - ASSERT (*m++ == MAGIC1); ASSERT (*m++ == MAGIC1); - ASSERT (*m++ == MAGIC1); ASSERT (*m == MAGIC1); - } -#else /* not rcheck */ - if (p -> mh_index >= 13) - tocopy = (1 << (p -> mh_index + 3)) - ((sizeof *p + 7) & ~7); - else - tocopy = p -> mh_size; -#endif /* not rcheck */ - - /* See if desired size rounds to same power of 2 as actual size. */ - nbytes = (n + ((sizeof *p + 7) & ~7) + EXTRA + 7) & ~7; - - /* If ok, use the same block, just marking its size as changed. */ - if (nbytes > (4 << nunits) && nbytes <= (8 << nunits)) - { -#ifdef rcheck - register char *m = mem + tocopy; - *m++ = 0; *m++ = 0; *m++ = 0; *m++ = 0; - p-> mh_nbytes = n; - m = mem + n; - *m++ = MAGIC1; *m++ = MAGIC1; *m++ = MAGIC1; *m++ = MAGIC1; -#else /* not rcheck */ - p -> mh_size = n; -#endif /* not rcheck */ - return mem; - } - - if (n < tocopy) - tocopy = n; - { - register char *new; - - if ((new = malloc (n)) == 0) - return 0; - bcopy (mem, new, tocopy); - free (mem); - return new; - } -} - -/* This is in case something linked with Emacs calls calloc. */ - -char * -calloc (num, size) - unsigned num, size; -{ - register char *mem; - - num *= size; - mem = malloc (num); - if (mem != 0) - bzero (mem, num); - return mem; -} - -#ifndef VMS - -char * -memalign (alignment, size) - unsigned alignment, size; -{ - register char *ptr = malloc (size + alignment); - register char *aligned; - register struct mhead *p; - - if (ptr == 0) - return 0; - /* If entire block has the desired alignment, just accept it. */ - if (((int) ptr & (alignment - 1)) == 0) - return ptr; - /* Otherwise, get address of byte in the block that has that alignment. */ - aligned = (char *) (((int) ptr + alignment - 1) & -alignment); - - /* Store a suitable indication of how to free the block, - so that free can find the true beginning of it. */ - p = (struct mhead *) (aligned - ((7 + sizeof (struct mhead)) & ~7)); - p -> mh_size = aligned - ptr; - p -> mh_alloc = ISMEMALIGN; - return aligned; -} - -#ifndef HPUX -/* This runs into trouble with getpagesize on HPUX. - Patching out seems cleaner than the ugly fix needed. */ -char * -valloc (size) -{ - return memalign (getpagesize (), size); -} -#endif /* not HPUX */ -#endif /* not VMS */ - -#ifdef MSTATS -/* Return statistics describing allocation of blocks of size 2**n. */ - -struct mstats_value - { - int blocksize; - int nfree; - int nused; - }; - -struct mstats_value -malloc_stats (size) - int size; -{ - struct mstats_value v; - register int i; - register struct mhead *p; - - v.nfree = 0; - - if (size < 0 || size >= 30) - { - v.blocksize = 0; - v.nused = 0; - return v; - } - - v.blocksize = 1 << (size + 3); - v.nused = nmalloc[size]; - - for (p = nextf[size]; p; p = CHAIN (p)) - v.nfree++; - - return v; -} -int -malloc_mem_used () -{ - int i; - int size_used; - - size_used = 0; - - for (i = 0; i < 30; i++) - { - int allocation_size = 1 << (i + 3); - struct mhead *p; - - size_used += nmalloc[i] * allocation_size; - } - - return size_used; -} - -int -malloc_mem_free () -{ - int i; - int size_unused; - - size_unused = 0; - - for (i = 0; i < 30; i++) - { - int allocation_size = 1 << (i + 3); - struct mhead *p; - - for (p = nextf[i]; p ; p = CHAIN (p)) - size_unused += allocation_size; - } - - return size_unused; -} -#endif /* MSTATS */ - -/* - * This function returns the total number of bytes that the process - * will be allowed to allocate via the sbrk(2) system call. On - * BSD systems this is the total space allocatable to stack and - * data. On USG systems this is the data space only. - */ - -#ifdef USG - -get_lim_data () -{ - extern long ulimit (); - -#ifdef ULIMIT_BREAK_VALUE - lim_data = ULIMIT_BREAK_VALUE; -#else - lim_data = ulimit (3, 0); -#endif - - lim_data -= (long) data_space_start; -} - -#else /* not USG */ -#if defined (BSD4_1) || defined (VMS) - -get_lim_data () -{ - lim_data = vlimit (LIM_DATA, -1); -} - -#else /* not BSD4_1 and not VMS */ - -get_lim_data () -{ - struct rlimit XXrlimit; - - getrlimit (RLIMIT_DATA, &XXrlimit); -#ifdef RLIM_INFINITY - lim_data = XXrlimit.rlim_cur & RLIM_INFINITY; /* soft limit */ -#else - lim_data = XXrlimit.rlim_cur; /* soft limit */ -#endif -} - -#endif /* not BSD4_1 and not VMS */ -#endif /* not USG */ - -#ifdef VMS -/* There is a problem when dumping and restoring things on VMS. Calls - * to SBRK don't necessarily result in contiguous allocation. Dumping - * doesn't work when it isn't. Therefore, we make the initial - * allocation contiguous by allocating a big chunk, and do SBRKs from - * there. Once Emacs has dumped there is no reason to continue - * contiguous allocation, malloc doesn't depend on it. - * - * There is a further problem of using brk and sbrk while using VMS C - * run time library routines malloc, calloc, etc. The documentation - * says that this is a no-no, although I'm not sure why this would be - * a problem. In any case, we remove the necessity to call brk and - * sbrk, by calling calloc (to assure zero filled data) rather than - * sbrk. - * - * VMS_ALLOCATION_SIZE is the size of the allocation array. This - * should be larger than the malloc size before dumping. Making this - * too large will result in the startup procedure slowing down since - * it will require more space and time to map it in. - * - * The value for VMS_ALLOCATION_SIZE in the following define was determined - * by running emacs linked (and a large allocation) with the debugger and - * looking to see how much storage was used. The allocation was 201 pages, - * so I rounded it up to a power of two. - */ -#ifndef VMS_ALLOCATION_SIZE -#define VMS_ALLOCATION_SIZE (512*256) -#endif - -/* Use VMS RTL definitions */ -#undef sbrk -#undef brk -#undef malloc -int vms_out_initial = 0; -char vms_initial_buffer[VMS_ALLOCATION_SIZE]; -static char *vms_current_brk = &vms_initial_buffer; -static char *vms_end_brk = &vms_initial_buffer[VMS_ALLOCATION_SIZE-1]; - -#include <stdio.h> - -char * -sys_sbrk (incr) - int incr; -{ - char *sbrk(), *temp, *ptr; - - if (vms_out_initial) - { - /* out of initial allocation... */ - if (!(temp = malloc (incr))) - temp = (char *) -1; - } - else - { - /* otherwise, go out of our area */ - ptr = vms_current_brk + incr; /* new current_brk */ - if (ptr <= vms_end_brk) - { - temp = vms_current_brk; - vms_current_brk = ptr; - } - else - { - vms_out_initial = 1; /* mark as out of initial allocation */ - if (!(temp = malloc (incr))) - temp = (char *) -1; - } - } - return temp; -} -#endif /* VMS */ diff --git a/regex-0.12/test/fileregex.c b/regex-0.12/test/fileregex.c @@ -1,77 +0,0 @@ -#include <sys/types.h> -#include <stdio.h> -#include "regex.h" - -#define BYTEWIDTH 8 - -/* Sorry, but this is just a test program. */ -#define LINE_MAX 500 - -int -main (argc, argv) - int argc; - char *argv[]; -{ - FILE *f; - char *filename; - char pat[500]; /* Sorry for that maximum size, too. */ - char line[LINE_MAX]; - struct re_pattern_buffer buf; - char fastmap[(1 << BYTEWIDTH)]; - const char *compile_ret; - unsigned lineno = 1; - unsigned nfound = 0; - - /* Actually, it might be useful to allow the data file to be standard - input, and to specify the pattern on the command line. */ - if (argc != 2) - { - fprintf (stderr, "Usage: %s <filename>.\n", argv[0]); - exit (1); - } - - filename = argv[1]; - f = fopen (filename, "r"); - if (f == NULL) - perror (filename); - - buf.allocated = 0; - buf.buffer = NULL; - buf.fastmap = fastmap; - - printf ("Pattern = ", pat); - gets (pat); - - if (feof (stdin)) - { - putchar ('\n'); - exit (0); - } - - compile_ret = re_compile_pattern (pat, strlen (pat), &buf); - if (compile_ret != NULL) - { - fprintf (stderr, "%s: %s\n", pat, compile_ret); - exit (1); - } - - while (fgets (line, LINE_MAX, f) != NULL) - { - size_t len = strlen (line); - struct re_registers regs; - int search_ret - = re_search_2 (&buf, NULL, 0, line, len, 0, len, &regs, len); - - if (search_ret == -2) - { - fprintf (stderr, "%s:%d: re_search failed.\n", filename, lineno); - exit (1); - } - - nfound += search_ret != -1; - lineno++; - } - - printf ("Matches found: %u (out of %u lines).\n", nfound, lineno - 1); - return 0; -} diff --git a/regex-0.12/test/g++malloc.c b/regex-0.12/test/g++malloc.c @@ -1,1288 +0,0 @@ -#define inline - -/* -Copyright (C) 1989 Free Software Foundation - written by Doug Lea (dl@oswego.edu) - -This file is part of GNU CC. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY. No author or distributor -accepts responsibility to anyone for the consequences of using it -or for whether it serves any particular purpose or works at all, -unless he says so in writing. Refer to the GNU CC General Public -License for full details. - -Everyone is granted permission to copy, modify and redistribute -GNU CC, but only under the conditions described in the -GNU CC General Public License. A copy of this license is -supposed to have been given to you along with GNU CC so you -can know your rights and responsibilities. It should be in a -file named COPYING. Among other things, the copyright notice -and this notice must be preserved on all copies. -*/ - - - -#ifndef NO_LIBGXX_MALLOC /* ignore whole file otherwise */ - -/* compile with -DMALLOC_STATS to collect statistics */ -/* collecting statistics slows down malloc by at least 15% */ - -#ifdef MALLOC_STATS -#define UPDATE_STATS(ARGS) {ARGS;} -#else -#define UPDATE_STATS(ARGS) -#endif - -/* History - - - Tue Jan 16 04:54:27 1990 Doug Lea (dl at g.oswego.edu) - - version 1 released in libg++ - - Sun Jan 21 05:52:47 1990 Doug Lea (dl at g.oswego.edu) - - bins are now own struct for, sanity. - - new victim search strategy: scan up and consolidate. - Both faster and less fragmentation. - - refined when to scan bins for consolidation, via consollink, etc. - - realloc: always try to expand chunk, avoiding some fragmentation. - - changed a few inlines into macros - - hardwired SBRK_UNIT to 4096 for uniformity across systems - - Tue Mar 20 14:18:23 1990 Doug Lea (dl at g.oswego.edu) - - calloc and cfree now correctly parameterized. - - Sun Apr 1 10:00:48 1990 Doug Lea (dl at g.oswego.edu) - - added memalign and valloc. - - Sun Jun 24 05:46:48 1990 Doug Lea (dl at g.oswego.edu) - - #include gepagesize.h only ifndef sun - cache pagesize after first call - - Wed Jul 25 08:35:19 1990 Doug Lea (dl at g.oswego.edu) - - No longer rely on a `designated victim': - - 1. It sometimes caused splits of large chunks - when smaller ones would do, leading to - bad worst-case fragmentation. - - 2. Scanning through the av array fast anyway, - so the overhead isn't worth it. - - To compensate, several other minor changes: - - 1. Unusable chunks are checked for consolidation during - searches inside bins, better distributing chunks - across bins. - - 2. Chunks are returned when found in malloc_find_space, - rather than finishing cleaning everything up, to - avoid wasted iterations due to (1). -*/ - -/* - A version of malloc/free/realloc tuned for C++ applications. - - Here's what you probably want to know first: - - In various tests, this appears to be about as fast as, - and usually substantially less memory-wasteful than BSD/GNUemacs malloc. - - Generally, it is slower (by perhaps 20%) than bsd-style malloc - only when bsd malloc would waste a great deal of space in - fragmented blocks, which this malloc recovers; or when, by - chance or design, nearly all requests are near the bsd malloc - power-of-2 allocation bin boundaries, and as many chunks are - used as are allocated. - - It uses more space than bsd malloc only when, again by chance - or design, only bsdmalloc bin-sized requests are malloced, or when - little dynamic space is malloced, since this malloc may grab larger - chunks from the system at a time than bsd. - - In other words, this malloc seems generally superior to bsd - except perhaps for programs that are specially tuned to - deal with bsdmalloc's characteristics. But even here, the - performance differences are slight. - - - This malloc, like any other, is a compromised design. - - - Chunks of memory are maintained using a `boundary tag' method as - described in e.g., Knuth or Standish. This means that the size of - the chunk is stored both in the front of the chunk and at the end. - This makes consolidating fragmented chunks into bigger chunks very fast. - The size field is also used to hold bits representing whether a - chunk is free or in use. - - Malloced chunks have space overhead of 8 bytes: The preceding - and trailing size fields. When they are freed, the list pointer - fields are also needed. - - Available chunks are kept in doubly linked lists. The lists are - maintained in an array of bins using a power-of-two method, except - that instead of 32 bins (one for each 1 << i), there are 128: each - power of two is split in quarters. The use of very fine bin sizes - closely approximates the use of one bin per actually used size, - without necessitating the overhead of locating such bins. It is - especially desirable in common C++ applications where large numbers - of identically-sized blocks are malloced/freed in some dynamic - manner, and then later are all freed. The finer bin sizes make - finding blocks fast, with little wasted overallocation. The - consolidation methods ensure that once the collection of blocks is - no longer useful, fragments are gathered into bigger chunks awaiting new - roles. - - The bins av[i] serve as heads of the lists. Bins contain a dummy - header for the chunk lists, and a `dirty' field used to indicate - whether the list may need to be scanned for consolidation. - - On allocation, the bin corresponding to the request size is - scanned, and if there is a chunk with size >= requested, it - is split, if too big, and used. Chunks on the list which are - too small are examined for consolidation during this traversal. - - If no chunk exists in the list bigger bins are scanned in search of - a victim. - - If no victim can be found, then smaller bins are examined for - consolidation in order to construct a victim. - - Finally, if consolidation fails to come up with a usable chunk, - more space is obtained from the system. - - After a split, the remainder is placed on - the back of the appropriate bin list. (All freed chunks are placed - on fronts of lists. All remaindered or consolidated chunks are - placed on the rear. Correspondingly, searching within a bin - starts at the front, but finding victims is from the back. All - of this approximates the effect of having 2 kinds of lists per - bin: returned chunks vs unallocated chunks, but without the overhead - of maintaining 2 lists.) - - Deallocation (free) consists only of placing the chunk on - a list. - - Reallocation proceeds in the usual way. If a chunk can be extended, - it is, else a malloc-copy-free sequence is taken. - - memalign requests more than enough space from malloc, finds a - spot within that chunk that meets the alignment request, and - then possibly frees the leading and trailing space. Overreliance - on memalign is a sure way to fragment space. - - - Some other implementation matters: - - 8 byte alignment is currently hardwired into the design. Calling - memalign will return a chunk that is both 8-byte aligned, and - meets the requested alignment. - - The basic overhead of a used chunk is 8 bytes: 4 at the front and - 4 at the end. - - When a chunk is free, 8 additional bytes are needed for free list - pointers. Thus, the minimum allocatable size is 16 bytes. - - The existence of front and back overhead permits some reasonably - effective fence-bashing checks: The front and back fields must - be identical. This is checked only within free() and realloc(). - The checks are fast enough to be made non-optional. - - The overwriting of parts of freed memory with the freelist pointers - can also be very effective (albeit in an annoying way) in helping - users track down dangling pointers. - - User overwriting of freed space will often result in crashes - within malloc or free. - - These routines are also tuned to C++ in that free(0) is a noop and - a failed malloc automatically calls (*new_handler)(). - - malloc(0) returns a pointer to something of the minimum allocatable size. - - Additional memory is gathered from the system (via sbrk) in a - way that allows chunks obtained across different sbrk calls to - be consolidated, but does not require contiguous memory: Thus, - it should be safe to intersperse mallocs with other sbrk calls. - - This malloc is NOT designed to work in multiprocessing applications. - No semaphores or other concurrency control are provided to ensure - that multiple malloc or free calls don't run at the same time, - which could be disasterous. - - VERY heavy use of inlines is made, for clarity. If this malloc - is ported via a compiler without inlining capabilities, all - inlines should be transformed into macros -- making them non-inline - makes malloc at least twice as slow. - - -*/ - - -/* preliminaries */ - -#ifdef __cplusplus -#include <stdio.h> -#else -#include "//usr/include/stdio.h" /* needed for error reporting */ -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef USG -extern void* memset(void*, int, int); -extern void* memcpy(void*, const void*, int); -/*inline void bzero(void* s, int l) { memset(s, 0, l); }*/ -#else -/*extern void bzero(void*, unsigned int);*/ -#endif - -/*extern void bcopy(void*, void*, unsigned int);*/ - -extern void* sbrk(unsigned int); - -/* Put this in instead of commmented out stuff above. */ -#define bcopy(s,d,n) memcpy((d),(s),(n)) -#define bcmp(s1,s2,n) memcmp((s1),(s2),(n)) -#define bzero(s,n) memset((s),0,(n)) - - -#ifdef __GNUC__ -extern volatile void abort(); -#else -extern void abort(); -#endif - -#ifdef __cplusplus -}; /* end of extern "C" */ -#endif - - -/* A good multiple to call sbrk with */ - -#define SBRK_UNIT 4096 - - - -/* how to die on detected error */ - -#ifdef __GNUC__ -static volatile void malloc_user_error() -#else -static void malloc_user_error() -#endif -{ - fputs("malloc/free/realloc: clobbered space detected\n", stderr); abort(); -} - - - -/* Basic overhead for each malloc'ed chunk */ - - -struct malloc_chunk -{ - unsigned int size; /* Size in bytes, including overhead. */ - /* Or'ed with INUSE if in use. */ - - struct malloc_chunk* fd; /* double links -- used only if free. */ - struct malloc_chunk* bk; - -}; - -typedef struct malloc_chunk* mchunkptr; - -struct malloc_bin -{ - struct malloc_chunk hd; /* dummy list header */ - unsigned int dirty; /* True if maybe consolidatable */ - /* Wasting a word here makes */ - /* sizeof(bin) a power of 2, */ - /* which makes size2bin() faster */ -}; - -typedef struct malloc_bin* mbinptr; - - -/* sizes, alignments */ - - -#define SIZE_SZ (sizeof(unsigned int)) -#define MALLOC_MIN_OVERHEAD (SIZE_SZ + SIZE_SZ) -#define MALLOC_ALIGN_MASK (MALLOC_MIN_OVERHEAD - 1) - -#define MINSIZE (sizeof(struct malloc_chunk) + SIZE_SZ) /* MUST == 16! */ - - -/* pad request bytes into a usable size */ - -static inline unsigned int request2size(unsigned int request) -{ - return (request == 0) ? MINSIZE : - ((request + MALLOC_MIN_OVERHEAD + MALLOC_ALIGN_MASK) - & ~(MALLOC_ALIGN_MASK)); -} - - -static inline int aligned_OK(void* m) -{ - return ((unsigned int)(m) & (MALLOC_ALIGN_MASK)) == 0; -} - - -/* size field or'd with INUSE when in use */ -#define INUSE 0x1 - - - -/* the bins, initialized to have null double linked lists */ - -#define MAXBIN 120 /* 1 more than needed for 32 bit addresses */ - -#define FIRSTBIN (&(av[0])) - -static struct malloc_bin av[MAXBIN] = -{ - { { 0, &(av[0].hd), &(av[0].hd) }, 0 }, - { { 0, &(av[1].hd), &(av[1].hd) }, 0 }, - { { 0, &(av[2].hd), &(av[2].hd) }, 0 }, - { { 0, &(av[3].hd), &(av[3].hd) }, 0 }, - { { 0, &(av[4].hd), &(av[4].hd) }, 0 }, - { { 0, &(av[5].hd), &(av[5].hd) }, 0 }, - { { 0, &(av[6].hd), &(av[6].hd) }, 0 }, - { { 0, &(av[7].hd), &(av[7].hd) }, 0 }, - { { 0, &(av[8].hd), &(av[8].hd) }, 0 }, - { { 0, &(av[9].hd), &(av[9].hd) }, 0 }, - - { { 0, &(av[10].hd), &(av[10].hd) }, 0 }, - { { 0, &(av[11].hd), &(av[11].hd) }, 0 }, - { { 0, &(av[12].hd), &(av[12].hd) }, 0 }, - { { 0, &(av[13].hd), &(av[13].hd) }, 0 }, - { { 0, &(av[14].hd), &(av[14].hd) }, 0 }, - { { 0, &(av[15].hd), &(av[15].hd) }, 0 }, - { { 0, &(av[16].hd), &(av[16].hd) }, 0 }, - { { 0, &(av[17].hd), &(av[17].hd) }, 0 }, - { { 0, &(av[18].hd), &(av[18].hd) }, 0 }, - { { 0, &(av[19].hd), &(av[19].hd) }, 0 }, - - { { 0, &(av[20].hd), &(av[20].hd) }, 0 }, - { { 0, &(av[21].hd), &(av[21].hd) }, 0 }, - { { 0, &(av[22].hd), &(av[22].hd) }, 0 }, - { { 0, &(av[23].hd), &(av[23].hd) }, 0 }, - { { 0, &(av[24].hd), &(av[24].hd) }, 0 }, - { { 0, &(av[25].hd), &(av[25].hd) }, 0 }, - { { 0, &(av[26].hd), &(av[26].hd) }, 0 }, - { { 0, &(av[27].hd), &(av[27].hd) }, 0 }, - { { 0, &(av[28].hd), &(av[28].hd) }, 0 }, - { { 0, &(av[29].hd), &(av[29].hd) }, 0 }, - - { { 0, &(av[30].hd), &(av[30].hd) }, 0 }, - { { 0, &(av[31].hd), &(av[31].hd) }, 0 }, - { { 0, &(av[32].hd), &(av[32].hd) }, 0 }, - { { 0, &(av[33].hd), &(av[33].hd) }, 0 }, - { { 0, &(av[34].hd), &(av[34].hd) }, 0 }, - { { 0, &(av[35].hd), &(av[35].hd) }, 0 }, - { { 0, &(av[36].hd), &(av[36].hd) }, 0 }, - { { 0, &(av[37].hd), &(av[37].hd) }, 0 }, - { { 0, &(av[38].hd), &(av[38].hd) }, 0 }, - { { 0, &(av[39].hd), &(av[39].hd) }, 0 }, - - { { 0, &(av[40].hd), &(av[40].hd) }, 0 }, - { { 0, &(av[41].hd), &(av[41].hd) }, 0 }, - { { 0, &(av[42].hd), &(av[42].hd) }, 0 }, - { { 0, &(av[43].hd), &(av[43].hd) }, 0 }, - { { 0, &(av[44].hd), &(av[44].hd) }, 0 }, - { { 0, &(av[45].hd), &(av[45].hd) }, 0 }, - { { 0, &(av[46].hd), &(av[46].hd) }, 0 }, - { { 0, &(av[47].hd), &(av[47].hd) }, 0 }, - { { 0, &(av[48].hd), &(av[48].hd) }, 0 }, - { { 0, &(av[49].hd), &(av[49].hd) }, 0 }, - - { { 0, &(av[50].hd), &(av[50].hd) }, 0 }, - { { 0, &(av[51].hd), &(av[51].hd) }, 0 }, - { { 0, &(av[52].hd), &(av[52].hd) }, 0 }, - { { 0, &(av[53].hd), &(av[53].hd) }, 0 }, - { { 0, &(av[54].hd), &(av[54].hd) }, 0 }, - { { 0, &(av[55].hd), &(av[55].hd) }, 0 }, - { { 0, &(av[56].hd), &(av[56].hd) }, 0 }, - { { 0, &(av[57].hd), &(av[57].hd) }, 0 }, - { { 0, &(av[58].hd), &(av[58].hd) }, 0 }, - { { 0, &(av[59].hd), &(av[59].hd) }, 0 }, - - { { 0, &(av[60].hd), &(av[60].hd) }, 0 }, - { { 0, &(av[61].hd), &(av[61].hd) }, 0 }, - { { 0, &(av[62].hd), &(av[62].hd) }, 0 }, - { { 0, &(av[63].hd), &(av[63].hd) }, 0 }, - { { 0, &(av[64].hd), &(av[64].hd) }, 0 }, - { { 0, &(av[65].hd), &(av[65].hd) }, 0 }, - { { 0, &(av[66].hd), &(av[66].hd) }, 0 }, - { { 0, &(av[67].hd), &(av[67].hd) }, 0 }, - { { 0, &(av[68].hd), &(av[68].hd) }, 0 }, - { { 0, &(av[69].hd), &(av[69].hd) }, 0 }, - - { { 0, &(av[70].hd), &(av[70].hd) }, 0 }, - { { 0, &(av[71].hd), &(av[71].hd) }, 0 }, - { { 0, &(av[72].hd), &(av[72].hd) }, 0 }, - { { 0, &(av[73].hd), &(av[73].hd) }, 0 }, - { { 0, &(av[74].hd), &(av[74].hd) }, 0 }, - { { 0, &(av[75].hd), &(av[75].hd) }, 0 }, - { { 0, &(av[76].hd), &(av[76].hd) }, 0 }, - { { 0, &(av[77].hd), &(av[77].hd) }, 0 }, - { { 0, &(av[78].hd), &(av[78].hd) }, 0 }, - { { 0, &(av[79].hd), &(av[79].hd) }, 0 }, - - { { 0, &(av[80].hd), &(av[80].hd) }, 0 }, - { { 0, &(av[81].hd), &(av[81].hd) }, 0 }, - { { 0, &(av[82].hd), &(av[82].hd) }, 0 }, - { { 0, &(av[83].hd), &(av[83].hd) }, 0 }, - { { 0, &(av[84].hd), &(av[84].hd) }, 0 }, - { { 0, &(av[85].hd), &(av[85].hd) }, 0 }, - { { 0, &(av[86].hd), &(av[86].hd) }, 0 }, - { { 0, &(av[87].hd), &(av[87].hd) }, 0 }, - { { 0, &(av[88].hd), &(av[88].hd) }, 0 }, - { { 0, &(av[89].hd), &(av[89].hd) }, 0 }, - - { { 0, &(av[90].hd), &(av[90].hd) }, 0 }, - { { 0, &(av[91].hd), &(av[91].hd) }, 0 }, - { { 0, &(av[92].hd), &(av[92].hd) }, 0 }, - { { 0, &(av[93].hd), &(av[93].hd) }, 0 }, - { { 0, &(av[94].hd), &(av[94].hd) }, 0 }, - { { 0, &(av[95].hd), &(av[95].hd) }, 0 }, - { { 0, &(av[96].hd), &(av[96].hd) }, 0 }, - { { 0, &(av[97].hd), &(av[97].hd) }, 0 }, - { { 0, &(av[98].hd), &(av[98].hd) }, 0 }, - { { 0, &(av[99].hd), &(av[99].hd) }, 0 }, - - { { 0, &(av[100].hd), &(av[100].hd) }, 0 }, - { { 0, &(av[101].hd), &(av[101].hd) }, 0 }, - { { 0, &(av[102].hd), &(av[102].hd) }, 0 }, - { { 0, &(av[103].hd), &(av[103].hd) }, 0 }, - { { 0, &(av[104].hd), &(av[104].hd) }, 0 }, - { { 0, &(av[105].hd), &(av[105].hd) }, 0 }, - { { 0, &(av[106].hd), &(av[106].hd) }, 0 }, - { { 0, &(av[107].hd), &(av[107].hd) }, 0 }, - { { 0, &(av[108].hd), &(av[108].hd) }, 0 }, - { { 0, &(av[109].hd), &(av[109].hd) }, 0 }, - - { { 0, &(av[110].hd), &(av[110].hd) }, 0 }, - { { 0, &(av[111].hd), &(av[111].hd) }, 0 }, - { { 0, &(av[112].hd), &(av[112].hd) }, 0 }, - { { 0, &(av[113].hd), &(av[113].hd) }, 0 }, - { { 0, &(av[114].hd), &(av[114].hd) }, 0 }, - { { 0, &(av[115].hd), &(av[115].hd) }, 0 }, - { { 0, &(av[116].hd), &(av[116].hd) }, 0 }, - { { 0, &(av[117].hd), &(av[117].hd) }, 0 }, - { { 0, &(av[118].hd), &(av[118].hd) }, 0 }, - { { 0, &(av[119].hd), &(av[119].hd) }, 0 } -}; - -/* - indexing into bins -*/ - -static inline mbinptr size2bin(unsigned int sz) -{ - mbinptr b = av; - while (sz >= (MINSIZE * 2)) { b += 4; sz >>= 1; } /* find power of 2 */ - b += (sz - MINSIZE) >> 2; /* find quadrant */ - return b; -} - - - -/* counts maintained if MALLOC_STATS defined */ - -#ifdef MALLOC_STATS - -static unsigned int sbrked_mem; -static unsigned int requested_mem; -static unsigned int malloced_mem; -static unsigned int freed_mem; -static unsigned int max_used_mem; - -static unsigned int n_sbrks; -static unsigned int n_mallocs; -static unsigned int n_frees; -static unsigned int n_reallocs; -static unsigned int n_reallocs_with_copy; -static unsigned int n_avail; -static unsigned int max_inuse; - -static unsigned int n_malloc_chunks; -static unsigned int n_malloc_bins; - -static unsigned int n_split; -static unsigned int n_consol; - - -static void do_malloc_stats(const mchunkptr p) -{ - ++n_mallocs; - if ((n_mallocs-n_frees) > max_inuse) - max_inuse = n_mallocs - n_frees; - malloced_mem += (p->size & ~(INUSE)); - if (malloced_mem - freed_mem > max_used_mem) - max_used_mem = malloced_mem - freed_mem; -} - -static void do_free_stats(const mchunkptr p) -{ - ++n_frees; - freed_mem += (p->size & ~(INUSE)); -} - -#endif - - - -/* Utilities needed below for memalign */ -/* This is redundant with libg++ support, but not if used stand-alone */ - -static unsigned int gcd(unsigned int a, unsigned int b) -{ - unsigned int tmp; - - if (b > a) - { - tmp = a; a = b; b = tmp; - } - for(;;) - { - if (b == 0) - return a; - else if (b == 1) - return b; - else - { - tmp = b; - b = a % b; - a = tmp; - } - } -} - -static inline unsigned int lcm(unsigned int x, unsigned int y) -{ - return x / gcd(x, y) * y; -} - - - -/* maintaining INUSE via size field */ - - -#define inuse(p) ((p)->size & INUSE) -#define set_inuse(p) ((p)->size |= INUSE) -#define clear_inuse(b) ((p)->size &= ~INUSE) - - -/* operations on malloc_chunk addresses */ - - -/* return ptr to next physical malloc_chunk */ - -#define next_chunk(p) ((mchunkptr)((char*)(p) + (p)->size)) - -/* return ptr to previous physical malloc_chunk */ - -#define prev_chunk(p) ((mchunkptr)((char*)(p)-((((int*)(p))[-1]) & ~(INUSE)))) - -/* place size at front and back of chunk */ - - -static inline void set_size(mchunkptr p, unsigned int sz) -{ - p->size = *((int*)((char*)(p) + sz - SIZE_SZ)) = sz; -} - - - - -/* conversion from malloc headers to user pointers, and back */ - -static inline void* chunk2mem(mchunkptr p) -{ - void *mem; - set_inuse(p); -mem = (void*)((char*)(p) + SIZE_SZ); - return mem; -} - -/* xxxx my own */ -mchunkptr sanity_check(void* mem) -{ - mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ); - - /* a quick sanity check */ - unsigned int sz = p->size & ~(INUSE); - if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ))) - malloc_user_error(); - - return p; -} - - - - -static inline mchunkptr mem2chunk(void* mem) -{ - mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ); - - /* a quick sanity check */ - unsigned int sz = p->size & ~(INUSE); - if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ))) - malloc_user_error(); - - p->size = sz; /* clears INUSE */ - return p; -} - - - -/* maintaining bins & pointers */ - - -/* maximum bin actually used */ - -static mbinptr malloc_maxbin = FIRSTBIN; - - -/* operations on lists inside bins */ - - -/* take a chunk off a list */ - -static inline void unlink(mchunkptr p) -{ - mchunkptr b = p->bk; - mchunkptr f = p->fd; - - f->bk = b; b->fd = f; - - UPDATE_STATS (--n_avail); -} - - - -/* split a chunk and place on the back of a list */ - -static inline void split(mchunkptr p, unsigned int offset) -{ - unsigned int room = p->size - offset; - if (room >= MINSIZE) - { - mbinptr bn = size2bin(room); /* new bin */ - mchunkptr h = &(bn->hd); /* its head */ - mchunkptr b = h->bk; /* old back element */ - mchunkptr t = (mchunkptr)((char*)(p) + offset); /* remaindered chunk */ - - /* set size */ - t->size = *((int*)((char*)(t) + room - SIZE_SZ)) = room; - - /* link up */ - t->bk = b; t->fd = h; h->bk = b->fd = t; - - /* adjust maxbin (h == b means was empty) */ - if (h == b && bn > malloc_maxbin) malloc_maxbin = bn; - - /* adjust size of chunk to be returned */ - p->size = *((int*)((char*)(p) + offset - SIZE_SZ)) = offset; - - UPDATE_STATS ((++n_split, ++n_avail)); - } -} - - - -/* place a consolidated chunk on the back of a list */ -/* like above, except no split */ - -static inline void consollink(mchunkptr p) -{ - mbinptr bn = size2bin(p->size); - mchunkptr h = &(bn->hd); - mchunkptr b = h->bk; - - p->bk = b; p->fd = h; h->bk = b->fd = p; - - if (h == b && bn > malloc_maxbin) malloc_maxbin = bn; - - UPDATE_STATS(++n_avail); -} - - -/* place a freed chunk on the front of a list */ - -static inline void frontlink(mchunkptr p) -{ - mbinptr bn = size2bin(p->size); - mchunkptr h = &(bn->hd); - mchunkptr f = h->fd; - - p->bk = h; p->fd = f; f->bk = h->fd = p; - - if (h == f && bn > malloc_maxbin) malloc_maxbin = bn; - - bn->dirty = 1; - - UPDATE_STATS(++n_avail); -} - - - -/* Dealing with sbrk */ - - -/* To link consecutive sbrk regions when possible */ - -static int* last_sbrk_end; - - -/* who to call when sbrk returns failure */ - -#ifndef NO_NEW_HANDLER -typedef volatile void (*vfp)(); -#ifdef __cplusplus -extern "C" vfp __new_handler; -#else -extern vfp __new_handler; -#endif -#endif - -static mchunkptr malloc_from_sys(unsigned nb) -{ - mchunkptr p; - unsigned int sbrk_size; - int* ip; - - /* Minimally, we need to pad with enough space */ - /* to place dummy size/use fields to ends if needed */ - - sbrk_size = ((nb + SBRK_UNIT - 1 + SIZE_SZ + SIZE_SZ) - / SBRK_UNIT) * SBRK_UNIT; - - ip = (int*)(sbrk(sbrk_size)); - if ((char*)ip == (char*)(-1)) /* sbrk returns -1 on failure */ - { -#ifndef NO_NEW_HANDLER - (*__new_handler) (); -#endif - return 0; - } - - UPDATE_STATS ((++n_sbrks, sbrked_mem += sbrk_size)); - - - if (last_sbrk_end != &ip[-1]) - { - /* It's either first time through or someone else called sbrk. */ - /* Arrange end-markers at front & back */ - - /* Shouldn't be necessary, but better to be safe */ - while (!aligned_OK(ip)) { ++ip; sbrk_size -= SIZE_SZ; } - - - /* Mark the front as in use to prevent merging. */ - /* Note we can get away with only 1 word, not MINSIZE overhead here */ - - *ip++ = SIZE_SZ | INUSE; - - p = (mchunkptr)ip; - set_size(p,sbrk_size - (SIZE_SZ + SIZE_SZ)); - - } - else - { - mchunkptr l; - - /* We can safely make the header start at end of prev sbrked chunk. */ - /* We will still have space left at the end from a previous call */ - /* to place the end marker, below */ - - p = (mchunkptr)(last_sbrk_end); - set_size(p, sbrk_size); - - - /* Even better, maybe we can merge with last fragment: */ - - l = prev_chunk(p); - if (!inuse(l)) - { - unlink(l); - set_size(l, p->size + l->size); - p = l; - } - - } - - /* mark the end of sbrked space as in use to prevent merging */ - - last_sbrk_end = (int*)((char*)p + p->size); - *last_sbrk_end = SIZE_SZ | INUSE; - - UPDATE_STATS((++n_avail, ++n_malloc_chunks)); - - /* make it safe to unlink in malloc */ - UPDATE_STATS(++n_avail); - p->fd = p->bk = p; - - return p; -} - - - -/* Consolidate dirty bins. */ -/* Stop if found a chunk big enough to satisfy current malloc request */ - -/* (It requires much less bookkeeping to consolidate entire bins */ -/* at once than to keep records of which chunks might be */ -/* consolidatable. So long as the lists are short, which we */ -/* try to ensure via small bin ranges, there is little wasted effort.) */ - -static mchunkptr malloc_find_space(unsigned int nb) -{ - mbinptr b; - - /* first, re-adjust max used bin */ - - while (malloc_maxbin >= FIRSTBIN && - malloc_maxbin->hd.bk == &(malloc_maxbin->hd)) - { - malloc_maxbin->dirty = 0; - --malloc_maxbin; - } - - for (b = malloc_maxbin; b >= FIRSTBIN; --b) - { - UPDATE_STATS(++n_malloc_bins); - - if (b->dirty) - { - mchunkptr h = &(b->hd); /* head of list */ - mchunkptr p = h->fd; /* chunk traverser */ - - while (p != h) - { - mchunkptr nextp = p->fd; /* save, in case of relinks */ - int consolidated = 0; /* only unlink/relink if consolidated */ - - mchunkptr t; - - UPDATE_STATS(++n_malloc_chunks); - - while (!inuse(t = prev_chunk(p))) /* consolidate backward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(t, t->size + p->size); - p = t; - UPDATE_STATS (++n_consol); - } - - while (!inuse(t = next_chunk(p))) /* consolidate forward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(p, p->size + t->size); - UPDATE_STATS (++n_consol); - } - - if (consolidated) - { - if (p->size >= nb) - { - /* make it safe to unlink in malloc */ - UPDATE_STATS(++n_avail); - p->fd = p->bk = p; - return p; - } - else - consollink(p); - } - - p = nextp; - - } - - b->dirty = 0; - - } - } - - /* nothing available - sbrk some more */ - - return malloc_from_sys(nb); -} - - - -/* Finally, the user-level functions */ - -void* malloc(unsigned int bytes) -{ - unsigned int nb = request2size(bytes); /* padded request size */ - mbinptr b = size2bin(nb); /* corresponding bin */ - mchunkptr hd = &(b->hd); /* head of its list */ - mchunkptr p = hd->fd; /* chunk traverser */ - - UPDATE_STATS((requested_mem+=bytes, ++n_malloc_bins)); - - /* Try a (near) exact match in own bin */ - /* clean out unusable but consolidatable chunks in bin while traversing */ - - while (p != hd) - { - UPDATE_STATS(++n_malloc_chunks); - if (p->size >= nb) - goto found; - else /* try to consolidate; same code as malloc_find_space */ - { - mchunkptr nextp = p->fd; /* save, in case of relinks */ - int consolidated = 0; /* only unlink/relink if consolidated */ - - mchunkptr t; - - while (!inuse(t = prev_chunk(p))) /* consolidate backward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(t, t->size + p->size); - p = t; - UPDATE_STATS (++n_consol); - } - - while (!inuse(t = next_chunk(p))) /* consolidate forward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(p, p->size + t->size); - UPDATE_STATS (++n_consol); - } - - if (consolidated) - { - if (p->size >= nb) - { - /* make it safe to unlink again below */ - UPDATE_STATS(++n_avail); - p->fd = p->bk = p; - goto found; - } - else - consollink(p); - } - - p = nextp; - - } - } - - b->dirty = 0; /* true if got here */ - - /* Scan bigger bins for a victim */ - - while (++b <= malloc_maxbin) - { - UPDATE_STATS(++n_malloc_bins); - if ((p = b->hd.bk) != &(b->hd)) /* no need to check size */ - goto found; - } - - /* Consolidate or sbrk */ - - p = malloc_find_space(nb); - - if (p == 0) return 0; /* allocation failure */ - - found: /* Use what we found */ - - unlink(p); - split(p, nb); - UPDATE_STATS(do_malloc_stats(p)); - return chunk2mem(p); -} - - - - -void free(void* mem) -{ - if (mem != 0) - { - mchunkptr p = mem2chunk(mem); - UPDATE_STATS(do_free_stats(p)); - frontlink(p); - } -} - - -void* calloc(unsigned int n, unsigned int elem_size) -{ - unsigned int sz = n * elem_size; - void* p = malloc(sz); - bzero(p, sz); - return p; -}; - -/* This is here for compatibility with older systems */ -void cfree(void *mem) -{ - free(mem); -} - - -unsigned int malloc_usable_size(void* mem) -{ - if (mem == 0) - return 0; - else - { - mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ); - unsigned int sz = p->size & ~(INUSE); - if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ))) - return 0; - else - return sz - MALLOC_MIN_OVERHEAD; - } -} - - - -void* realloc(void* mem, unsigned int bytes) -{ - if (mem == 0) - return malloc(bytes); - else - { - unsigned int nb = request2size(bytes); - mchunkptr p = mem2chunk(mem); - unsigned int oldsize = p->size; - int room; - mchunkptr nxt; - - UPDATE_STATS((++n_reallocs, requested_mem += bytes-oldsize)); - - /* try to expand (even if already big enough), to clean up chunk */ - - while (!inuse(nxt = next_chunk(p))) - { - UPDATE_STATS ((malloced_mem += nxt->size, ++n_consol)); - unlink(nxt); - set_size(p, p->size + nxt->size); - } - - room = p->size - nb; - if (room >= 0) - { - split(p, nb); - UPDATE_STATS(malloced_mem -= room); - return chunk2mem(p); - } - else /* do the obvious */ - { - void* newmem; - set_inuse(p); /* don't let malloc consolidate us yet! */ - newmem = malloc(nb); - bcopy(mem, newmem, oldsize - SIZE_SZ); - free(mem); - UPDATE_STATS(++n_reallocs_with_copy); - return newmem; - } - } -} - - - -/* return a pointer to space with at least the alignment requested */ - -void* memalign(unsigned int alignment, unsigned int bytes) -{ - mchunkptr p; - unsigned int nb = request2size(bytes); - - /* find an alignment that both we and the user can live with: */ - /* least common multiple guarantees mutual happiness */ - unsigned int align = lcm(alignment, MALLOC_MIN_OVERHEAD); - unsigned int mask = align - 1; - - /* call malloc with worst case padding to hit alignment; */ - /* we will give back extra */ - - unsigned int req = nb + align + MINSIZE; - void* m = malloc(req); - - if (m == 0) return m; - - p = mem2chunk(m); - - /* keep statistics on track */ - - UPDATE_STATS(--n_mallocs); - UPDATE_STATS(malloced_mem -= p->size); - UPDATE_STATS(requested_mem -= req); - UPDATE_STATS(requested_mem += bytes); - - if (((int)(m) & (mask)) != 0) /* misaligned */ - { - - /* find an aligned spot inside chunk */ - - mchunkptr ap = (mchunkptr)(( ((int)(m) + mask) & -align) - SIZE_SZ); - - unsigned int gap = (unsigned int)(ap) - (unsigned int)(p); - unsigned int room; - - /* we need to give back leading space in a chunk of at least MINSIZE */ - - if (gap < MINSIZE) - { - /* This works since align >= MINSIZE */ - /* and we've malloc'd enough total room */ - - ap = (mchunkptr)( (int)(ap) + align ); - gap += align; - } - - if (gap + nb > p->size) /* can't happen unless chunk sizes corrupted */ - malloc_user_error(); - - room = p->size - gap; - - /* give back leader */ - set_size(p, gap); - consollink(p); - - /* use the rest */ - p = ap; - set_size(p, room); - } - - /* also give back spare room at the end */ - - split(p, nb); - UPDATE_STATS(do_malloc_stats(p)); - return chunk2mem(p); - -} - -#ifndef sun -#include "getpagesize.h" -#endif - -static unsigned int malloc_pagesize = 0; - -void* valloc(unsigned int bytes) -{ - if (malloc_pagesize == 0) malloc_pagesize = getpagesize(); - return memalign (malloc_pagesize, bytes); -} - - -void malloc_stats() -{ -#ifndef MALLOC_STATS -} -#else - int i; - mchunkptr p; - double nm = (double)(n_mallocs + n_reallocs); - - fprintf(stderr, "\nmalloc statistics\n\n"); - - if (n_mallocs != 0) - fprintf(stderr, "requests = %10u total size = %10u\tave = %10u\n", - n_mallocs, requested_mem, requested_mem/n_mallocs); - - if (n_mallocs != 0) - fprintf(stderr, "mallocs = %10u total size = %10u\tave = %10u\n", - n_mallocs, malloced_mem, malloced_mem/n_mallocs); - - if (n_frees != 0) - fprintf(stderr, "frees = %10u total size = %10u\tave = %10u\n", - n_frees, freed_mem, freed_mem/n_frees); - - if (n_mallocs-n_frees != 0) - fprintf(stderr, "in use = %10u total size = %10u\tave = %10u\n", - n_mallocs-n_frees, malloced_mem-freed_mem, - (malloced_mem-freed_mem) / (n_mallocs-n_frees)); - - if (max_inuse != 0) - fprintf(stderr, "max in use= %10u total size = %10u\tave = %10u\n", - max_inuse, max_used_mem, max_used_mem / max_inuse); - - if (n_avail != 0) - fprintf(stderr, "available = %10u total size = %10u\tave = %10u\n", - n_avail, sbrked_mem - (malloced_mem-freed_mem), - (sbrked_mem - (malloced_mem-freed_mem)) / n_avail); - - if (n_sbrks != 0) - fprintf(stderr, "sbrks = %10u total size = %10u\tave = %10u\n\n", - n_sbrks, sbrked_mem, sbrked_mem/ n_sbrks); - - if (n_reallocs != 0) - fprintf(stderr, "reallocs = %10u with copy = %10u\n\n", - n_reallocs, n_reallocs_with_copy); - - - if (nm != 0) - { - fprintf(stderr, "chunks scanned per malloc = %6.3f\n", - n_malloc_chunks / nm); - fprintf(stderr, "bins scanned per malloc = %6.3f\n", - n_malloc_bins / nm); - fprintf(stderr, "splits per malloc = %6.3f\n", - n_split / nm); - fprintf(stderr, "consolidations per malloc = %6.3f\n", - n_consol / nm); - } - - fprintf(stderr, "\nfree chunks:\n"); - for (i = 0; i < MAXBIN; ++i) - { - p = av[i].hd.fd; - if (p != &(av[i].hd)) - { - unsigned int count = 1; - unsigned int sz = p->size; - for (p = p->fd; p != &(av[i].hd); p = p->fd) - { - if (p->size == sz) - ++count; - else - { - fprintf(stderr, "\tsize = %10u count = %5u\n", sz, count); - count = 1; - sz = p->size; - } - } - - fprintf(stderr, "\tsize = %10u count = %5u\n", sz, count); - - } - } -} -#endif /* MALLOC_STATS */ - -#endif /* NO_LIBGXX_MALLOC */ - - diff --git a/regex-0.12/test/getpagesize.h b/regex-0.12/test/getpagesize.h @@ -1,25 +0,0 @@ -#ifdef BSD -#ifndef BSD4_1 -#define HAVE_GETPAGESIZE -#endif -#endif - -#ifndef HAVE_GETPAGESIZE - -#include <sys/param.h> - -#ifdef EXEC_PAGESIZE -#define getpagesize() EXEC_PAGESIZE -#else -#ifdef NBPG -#define getpagesize() NBPG * CLSIZE -#ifndef CLSIZE -#define CLSIZE 1 -#endif /* no CLSIZE */ -#else /* no NBPG */ -#define getpagesize() NBPC -#endif /* no NBPG */ -#endif /* no EXEC_PAGESIZE */ - -#endif /* not HAVE_GETPAGESIZE */ - diff --git a/regex-0.12/test/iregex.c b/regex-0.12/test/iregex.c @@ -1,164 +0,0 @@ -/* Main program for interactive testing. For maximum output, compile - this and regex.c with -DDEBUG. */ - -#include <stdio.h> -#include <sys/types.h> -#include "regex.h" - -/* Don't bother to guess about <string.h> vs <strings.h>, etc. */ -extern int strlen (); - -#define BYTEWIDTH 8 - -extern void printchar (); -extern char upcase[]; - -static void scanstring (); -static void print_regs (); - -int -main (argc, argv) - int argc; - char **argv; -{ - int i; - struct re_pattern_buffer buf; - char fastmap[(1 << BYTEWIDTH)]; - - /* Allow a command argument to specify the style of syntax. You can - use the `syntax' program to decode integer syntax values. */ - if (argc > 1) - re_set_syntax (atoi (argv[1])); - - buf.allocated = 0; - buf.buffer = NULL; - buf.fastmap = fastmap; - buf.translate = upcase; - - for (;;) - { - char pat[500], str[500]; - struct re_registers regs; - - /* Some C compilers don't like `char pat[500] = ""'. */ - pat[0] = 0; - - printf ("Pattern (%s) = ", pat); - gets (pat); - scanstring (pat); - - if (feof (stdin)) - { - putchar ('\n'); - exit (0); - } - - if (*pat) - { - re_compile_pattern (pat, strlen (pat), &buf); - re_compile_fastmap (&buf); -#ifdef DEBUG - print_compiled_pattern (&buf); -#endif - } - - printf ("String = "); - gets (str); /* Now read the string to match against */ - scanstring (str); - - i = re_match (&buf, str, strlen (str), 0, &regs); - printf ("Match value %d.\t", i); - if (i >= 0) - print_regs (regs); - putchar ('\n'); - - i = re_search (&buf, str, strlen (str), 0, strlen (str), &regs); - printf ("Search value %d.\t", i); - if (i >= 0) - print_regs (regs); - putchar ('\n'); - } - - /* We never get here, but what the heck. */ - return 0; -} - -void -scanstring (s) - char *s; -{ - char *write = s; - - while (*s != '\0') - { - if (*s == '\\') - { - s++; - - switch (*s) - { - case '\0': - break; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - *write = *s++ - '0'; - - if ('0' <= *s && *s <= '9') - { - *write = (*write << 3) + (*s++ - '0'); - if ('0' <= *s && *s <= '9') - *write = (*write << 3) + (*s++ - '0'); - } - write++; - break; - - case 'n': - *write++ = '\n'; - s++; - break; - - case 't': - *write++ = '\t'; - s++; - break; - - default: - *write++ = *s++; - break; - } - } - else - *write++ = *s++; - } - - *write++ = '\0'; -} - -/* Print REGS in human-readable form. */ - -void -print_regs (regs) - struct re_registers regs; -{ - int i, end; - - printf ("Registers: "); - - if (regs.num_regs == 0 || regs.start[0] == -1) - { - printf ("(none)"); - } - else - { - /* Find the last register pair that matched. */ - for (end = regs.num_regs - 1; end >= 0; end--) - if (regs.start[end] != -1) - break; - - printf ("[%d ", regs.start[0]); - for (i = 1; i <= end; i++) - printf ("(%d %d) ", regs.start[i], regs.end[i]); - printf ("%d]", regs.end[0]); - } -} diff --git a/regex-0.12/test/main.c b/regex-0.12/test/main.c @@ -1,49 +0,0 @@ -/* Main routine for running various tests. Meant only to be linked with - all the auxiliary test source files, with `test' undefined. */ - -#include "test.h" - -test_type t = all_test; - - -/* Use this to run the tests we've thought of. */ - -int -main () -{ - switch (t) - { - case all_test: - test_regress (); - test_others (); - test_posix_basic (); - test_posix_extended (); - test_posix_interface (); - break; - - case other_test: - test_others (); - break; - - case posix_basic_test: - test_posix_basic (); - break; - - case posix_extended_test: - test_posix_extended (); - break; - - case posix_interface_test: - test_posix_interface (); - break; - - case regress_test: - test_regress (); - break; - - default: - fprintf (stderr, "Unknown test %d.\n", t); - } - - return 0; -} diff --git a/regex-0.12/test/malloc-test.c b/regex-0.12/test/malloc-test.c @@ -1,47 +0,0 @@ - - -typedef struct { - unsigned *bits; - unsigned size; -} bits_list_type; - -#define BYTEWIDTH 8 -#define NULL 0 - -#define BITS_BLOCK_SIZE (sizeof (unsigned) * BYTEWIDTH) -#define BITS_BLOCK(position) ((position) / BITS_BLOCK_SIZE) -#define BITS_MASK(position) (1 << ((position) % BITS_BLOCK_SIZE)) - -static unsigned -init_bits_list (bits_list_ptr) - bits_list_type *bits_list_ptr; -{ - bits_list_ptr->bits = NULL; - bits_list_ptr->bits = (unsigned *) malloc (sizeof (unsigned)); - - if (bits_list_ptr->bits == NULL) - return 0; - - bits_list_ptr->bits[0] = (unsigned)0; - bits_list_ptr->size = BITS_BLOCK_SIZE; - - return 1; -} - - -main() -{ - bits_list_type dummy; - bits_list_type dummy_1; - bits_list_type dummy_2; - bits_list_type dummy_3; - - init_bits_list (&dummy); -printf("init 1\n"); - init_bits_list (&dummy_1); -printf("init 2\n"); - init_bits_list (&dummy_2); -printf("init 3\n"); - init_bits_list (&dummy_3); -printf("init 4\n"); -} diff --git a/regex-0.12/test/other.c b/regex-0.12/test/other.c @@ -1,503 +0,0 @@ -/* other.c: test (not exhaustively) non-POSIX regular expressions. */ - -#include "test.h" - -void -test_others () -{ - struct re_registers regs; - - printf ("\nStarting non-POSIX tests.\n"); - t = other_test; - - test_should_match = true; - - /* The big question: does the group participate in the match, or match - the empty string? */ - re_set_syntax (RE_NO_BK_PARENS); - test_match ("(a*)*ab", "ab"); - TEST_REGISTERS ("(a*)*ab", "ab", 0, 2, 0, 0, -1, -1); - test_match ("(a*)*", ""); - TEST_REGISTERS ("(a*)*ab", "ab", 0, 0, 0, 0, -1, -1); - - /* This tests finding the highest and lowest active registers. */ - test_match ("(a(b)c(d(e)f)g)h(i(j)k(l(m)n)o)\\1\\2\\3\\4\\5\\6\\7\\8", - "abcdefghijklmnoabcdefgbdefeijklmnojlmnm"); - - /* Test that \< and \> match at the beginning and end of the string. */ - test_match ("\\<abc\\>", "abc"); - - /* May as well test \` and \' while we're at it. */ - test_match ("\\`abc\\'", "abc"); - -#if 0 - /* Test backreferencing and the fastmap -- which doesn't work. */ - test_fastmap ("(a)*\\1", "a", 0, 0); -#endif - - /* But at least we shouldn't search improperly. */ - test_search_return (-1, "(a)\\1", ""); - - re_set_syntax (RE_SYNTAX_EMACS); - - MATCH_SELF("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - MATCH_SELF ("a^"); - MATCH_SELF ("a^b"); - MATCH_SELF ("$a"); - MATCH_SELF ("a$b"); - - re_set_syntax (RE_BACKSLASH_ESCAPE_IN_LISTS); - test_match ("[\\^a]", "a"); - test_match ("[\\^a]", "^"); - - /* These op characters should be ordinary if RE_CONTEXT_INVALID_OPS - isn't set. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_BRACES | RE_INTERVALS - | RE_NO_BK_PARENS); - MATCH_SELF ("*"); - test_match ("a|*", "*"); - test_match ("(*)", "*"); - - MATCH_SELF ("+"); - test_match ("a|+", "+"); - test_match ("(+)", "+"); - - MATCH_SELF ("?"); - test_match ("a|?", "?"); - test_match ("(?)", "?"); - - MATCH_SELF ("{1}"); - test_match ("a|{1}", "a"); - test_match ("a|{1}", "{1}"); - test_match ("({1})", "{1}"); - - test_match ("\\{", "{"); - - - re_set_syntax (RE_LIMITED_OPS); - MATCH_SELF ("|"); - MATCH_SELF ("a|"); - MATCH_SELF ("a|"); - MATCH_SELF ("a||"); - MATCH_SELF ("a||"); - MATCH_SELF ("(|)"); - - re_set_syntax (RE_SYNTAX_EMACS); - TEST_SEARCH ("^a", "b\na", 0, 3); - TEST_SEARCH ("b$", "b\na", 0, 3); - -#if 0 - /* Newline is no longer special for anchors (16 Sep 92). --karl */ - test_match_2 ("a\n^b", "a", "\nb"); - test_match_2 ("a$\nb", "a\n", "b"); -#endif - - /* Test grouping. */ - re_set_syntax (RE_NO_BK_PARENS); - - test_match ("()", ""); - test_fastmap ("()", "", 0, 0); - TEST_REGISTERS ("()", "", 0, 0, 0, 0, -1, -1); - - test_match ("((((((((()))))))))", ""); - test_fastmap ("((((((((()))))))))", "", 0, 0); - test_match ("a()b", "ab"); - TEST_REGISTERS ("a()b", "ab", 0, 2, 1, 1, -1, -1); - - test_match ("(((((((((())))))))))", ""); - test_fastmap ("(((((((((())))))))))", "", 0, 0); - - test_match ("()*", ""); - TEST_REGISTERS ("()*", "", 0, 0, 0, 0, -1, -1); /* empty string */ - test_match ("(())*", ""); - - re_set_syntax (RE_CONTEXT_INDEP_OPS); - test_match ("*", ""); - - re_set_syntax (RE_INTERVALS | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES); - test_match ("{1}", ""); /* Should remain an interval. */ - MATCH_SELF ("{1"); /* Not a valid interval. */ - - re_set_syntax (RE_NEWLINE_ALT); - test_match ("a\nb", "a"); - test_match ("a\nb", "b"); - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - test_match ("^a", "a"); - test_match ("(^a)", "a"); - test_match ("(a|^b)", "b"); - test_match ("a$", "a"); - test_match ("(a$)", "a"); - test_match ("a$|b", "a"); - - /* You should be able to have empty alternatives if RE_NO_EMPTY_ALTS - isn't set. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - - test_match ("|", ""); - test_match ("^|a", ""); - test_match ("^|a", "a"); - test_match ("a|", ""); - test_match ("a|", "a"); - test_match ("a|$", ""); - test_match ("a|$", "a"); - test_match ("a||b", "a"); - test_match ("a||b", ""); - test_match ("a||b", "b"); - test_match ("(|a)", ""); - test_match ("(|a)", "a"); - test_match ("(a|)", ""); - test_match ("(a|)", "a"); - - TEST_SEARCH ("a|$", "xa", 0, 2); - TEST_SEARCH ("a|$", "x", 0, 1); - TEST_SEARCH ("$|b", "x", 0, 1); - TEST_SEARCH ("$|b", "xb", 0, 2); - TEST_SEARCH ("c(a|$)", "xca", 0, 3); - TEST_SEARCH ("c(a|$)", "xc", 0, 2); - TEST_SEARCH ("c($|b)", "xcb", 0, 3); - TEST_SEARCH ("c($|b)", "xc", 0, 2); - TEST_SEARCH ("c($|b$)", "xcb", 0, 3); - TEST_SEARCH ("c($|b$)", "xc", 0, 2); - TEST_SEARCH ("c(a$|$)", "xca", 0, 3); - TEST_SEARCH ("c(a$|$)", "xc", 0, 2); - TEST_SEARCH ("(a$|b$)|$", "x", 0, 1); - TEST_SEARCH ("(a$|b$)|$", "xa", 0, 2); - TEST_SEARCH ("(a$|b$)|$", "xb", 0, 2); - TEST_SEARCH ("(a$|$)|c$", "x", 0, 1); - TEST_SEARCH ("(a$|$)|c$", "xa", 0, 2); - TEST_SEARCH ("(a$|$)|c$", "xc", 0, 2); - TEST_SEARCH ("($|b$)|c$", "x", 0, 1); - TEST_SEARCH ("($|b$)|c$", "xb", 0, 2); - TEST_SEARCH ("($|b$)|c$", "xc", 0, 2); - TEST_SEARCH ("c$|(a$|$)", "x", 0, 1); - TEST_SEARCH ("c$|(a$|$)", "xa", 0, 2); - TEST_SEARCH ("c$|(a$|$)", "xc", 0, 2); - TEST_SEARCH ("c$|($|b$)", "x", 0, 1); - TEST_SEARCH ("c$|($|b$)", "xb", 0, 2); - TEST_SEARCH ("c$|($|b$)", "xc", 0, 2); - TEST_SEARCH ("$|(a$|b$)", "x", 0, 1); - TEST_SEARCH ("$|(a$|b$)", "xa", 0, 2); - TEST_SEARCH ("$|(a$|b$)", "xb", 0, 2); - TEST_SEARCH ("c(a$|b$)|$", "x", 0, 1); - TEST_SEARCH ("c(a$|b$)|$", "xca", 0, 3); - TEST_SEARCH ("c(a$|b$)|$", "xcb", 0, 3); - TEST_SEARCH ("c(a$|$)|d$", "xc", 0, 2); - TEST_SEARCH ("c(a$|$)|d$", "xca", 0, 3); - TEST_SEARCH ("c(a$|$)|d$", "xd", 0, 2); - TEST_SEARCH ("c($|b$)|d$", "xc", 0, 2); - TEST_SEARCH ("c($|b$)|d$", "xcb", 0, 3); - TEST_SEARCH ("c($|b$)|d$", "xd", 0, 2); - TEST_SEARCH ("d(c$|e((a$|$)))", "xdc", 0, 3); - TEST_SEARCH ("d(c$|e((a$|$)))", "xde", 0, 3); - TEST_SEARCH ("d(c$|e((a$|$)))", "xdea", 0, 4); - TEST_SEARCH ("d(c$|e(($|b$)))", "xdc", 0, 3); - TEST_SEARCH ("d(c$|e(($|b$)))", "xde", 0, 3); - TEST_SEARCH ("d(c$|e(($|b$)))", "xdeb", 0, 4); - TEST_SEARCH ("d($|e((a$|b$)))", "xd", 0, 2); - TEST_SEARCH ("d($|e((a$|b$)))", "xdea", 0, 4); - TEST_SEARCH ("d($|e((a$|b$)))", "xdeb", 0, 4); - TEST_SEARCH ("a(b$|c$)|$", "x", 0, 1); - TEST_SEARCH ("a(b$|c$)|$", "xab", 0, 3); - TEST_SEARCH ("a(b$|c$)|$", "xac", 0, 3); - TEST_SEARCH ("a(b$|$)|d$", "xa", 0, 2); - TEST_SEARCH ("a(b$|$)|d$", "xab", 0, 3); - TEST_SEARCH ("a(b$|$)|d$", "xd", 0, 2); - TEST_SEARCH ("a($|c$)|d$", "xa", 0, 2); - TEST_SEARCH ("a($|c$)|d$", "xac", 0, 3); - TEST_SEARCH ("a($|c$)|d$", "xd", 0, 2); - TEST_SEARCH ("d$|a(b$|$)", "xd", 0, 2); - TEST_SEARCH ("d$|a(b$|$)", "xa", 0, 2); - TEST_SEARCH ("d$|a(b$|$)", "xab", 0, 3); - TEST_SEARCH ("d$|a($|c$)", "xd", 0, 2); - TEST_SEARCH ("d$|a($|c$)", "xa", 0, 2); - TEST_SEARCH ("d$|a($|c$)", "xac", 0, 3); - TEST_SEARCH ("$|a(b$|c$)", "x", 0, 1); - TEST_SEARCH ("$|a(b$|c$)", "xab", 0, 3); - TEST_SEARCH ("$|a(b$|c$)", "xac", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "xab", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "xac", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "xd", 0, 2); - TEST_SEARCH ("(a)(b$|$)|d$", "xa", 0, 2); - TEST_SEARCH ("(a)(b$|$)|d$", "xab", 0, 3); - TEST_SEARCH ("(a)(b$|$)|d$", "xd", 0, 2); - TEST_SEARCH ("(a)($|c$)|d$", "xa", 0, 2); - TEST_SEARCH ("(a)($|c$)|d$", "xac", 0, 3); - TEST_SEARCH ("(a)($|c$)|d$", "xd", 0, 2); - TEST_SEARCH ("d$|(a)(b$|$)", "xd", 0, 2); - TEST_SEARCH ("d$|(a)(b$|$)", "xa", 0, 2); - TEST_SEARCH ("d$|(a)(b$|$)", "xab", 0, 3); - TEST_SEARCH ("d$|(a)($|c$)", "xd", 0, 2); - TEST_SEARCH ("d$|(a)($|c$)", "xa", 0, 2); - TEST_SEARCH ("d$|(a)($|c$)", "xac", 0, 3); - TEST_SEARCH ("$|(a)(b$|c$)", "x", 0, 1); - TEST_SEARCH ("$|(a)(b$|c$)", "xab", 0, 3); - TEST_SEARCH ("$|(a)(b$|c$)", "xac", 0, 3); - TEST_SEARCH ("d$|(c$|(a$|$))", "x", 0, 1); - TEST_SEARCH ("d$|(c$|(a$|$))", "xd", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|$))", "xc", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|$))", "xa", 0, 2); - TEST_SEARCH ("d$|(c$|($|b$))", "x", 0, 1); - TEST_SEARCH ("d$|(c$|($|b$))", "xd", 0, 2); - TEST_SEARCH ("d$|(c$|($|b$))", "xc", 0, 2); - TEST_SEARCH ("d$|(c$|($|b$))", "xb", 0, 2); - TEST_SEARCH ("d$|($|(a$|b$))", "x", 0, 1); - TEST_SEARCH ("d$|($|(a$|b$))", "xd", 0, 2); - TEST_SEARCH ("d$|($|(a$|b$))", "xa", 0, 2); - TEST_SEARCH ("d$|($|(a$|b$))", "xb", 0, 2); - TEST_SEARCH ("$|(c$|(a$|b$))", "x", 0, 1); - TEST_SEARCH ("$|(c$|(a$|b$))", "xc", 0, 2); - TEST_SEARCH ("$|(c$|(a$|b$))", "xa", 0, 2); - TEST_SEARCH ("$|(c$|(a$|b$))", "xb", 0, 2); - TEST_SEARCH ("d$|c(a$|$)", "xd", 0, 2); - TEST_SEARCH ("d$|c(a$|$)", "xc", 0, 2); - TEST_SEARCH ("d$|c(a$|$)", "xca", 0, 3); - TEST_SEARCH ("d$|c($|b$)", "xd", 0, 2); - TEST_SEARCH ("d$|c($|b$)", "xc", 0, 2); - TEST_SEARCH ("d$|c($|b$)", "xcb", 0, 3); - TEST_SEARCH ("$|c(a$|b$)", "x", 0, 1); - TEST_SEARCH ("$|c(a$|b$)", "xca", 0, 3); - TEST_SEARCH ("$|c(a$|b$)", "xcb", 0, 3); - TEST_SEARCH ("e(d$|c((a$|$)))", "xed", 0, 3); - TEST_SEARCH ("e(d$|c((a$|$)))", "xec", 0, 3); - TEST_SEARCH ("e(d$|c((a$|$)))", "xeca", 0, 3); - TEST_SEARCH ("e(d$|c(($|b$)))", "xed", 0, 3); - TEST_SEARCH ("e(d$|c(($|b$)))", "xec", 0, 3); - TEST_SEARCH ("e(d$|c(($|b$)))", "xecb", 0, 4); - TEST_SEARCH ("e($|c((a$|b$)))", "xe", 0, 2); - TEST_SEARCH ("e($|c((a$|b$)))", "xeca", 0, 4); - TEST_SEARCH ("e($|c((a$|b$)))", "xecb", 0, 4); - TEST_SEARCH ("ed$|(c((a$|$)))", "xed", 0, 3); - TEST_SEARCH ("ed$|(c((a$|$)))", "xc", 0, 2); - TEST_SEARCH ("ed$|(c((a$|$)))", "xca", 0, 3); - TEST_SEARCH ("ed$|(c(($|b$)))", "xed", 0, 3); - TEST_SEARCH ("ed$|(c(($|b$)))", "xc", 0, 2); - TEST_SEARCH ("ed$|(c(($|b$)))", "xcb", 0, 3); - TEST_SEARCH ("$|(c((a$|b$)))", "x", 0, 1); - TEST_SEARCH ("$|(c((a$|b$)))", "xca", 0, 3); - TEST_SEARCH ("$|(c((a$|b$)))", "xcb", 0, 3); - TEST_SEARCH ("d$|($|(a|b)$)", "x", 0, 1); - TEST_SEARCH ("d$|($|(a|b)$)", "xa", 0, 2); - TEST_SEARCH ("d$|($|(a|b)$)", "xb", 0, 2); - TEST_SEARCH ("$|(c$|(a|b)$)", "x", 0, 1); - TEST_SEARCH ("$|(c$|(a|b)$)", "xc", 0, 2); - TEST_SEARCH ("$|(c$|(a|b)$)", "xa", 0, 2); - TEST_SEARCH ("$|(c$|(a|b)$)", "xb", 0, 2); - - re_set_syntax (0); - test_match ("[^\n]", "a"); - test_match ("[^a]", "\n"); - - TEST_SEARCH ("^a", "b\na", 0, 3); - TEST_SEARCH ("b$", "b\na", 0, 3); - - test_case_fold ("[!-`]", "A"); - test_case_fold ("[!-`]", "a"); - - re_set_syntax (RE_CONTEXT_INDEP_OPS | RE_NO_BK_VBAR | RE_NO_BK_PARENS - | RE_NO_BK_BRACES | RE_INTERVALS); - valid_nonposix_pattern ("()^a"); - valid_nonposix_pattern ("()\\1^a"); - - /* Per Cederqvist (cedar@lysator.liu.se) bug. */ - - re_set_syntax (RE_SYNTAX_EMACS); - - /* One `a' before the \n and 638 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "a\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - /* No a's before the \n and 639 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - /* One `a' before the \n and 639 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "a\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - /* No a's before the \n and 640 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - TEST_SEARCH ("^(^a)", "ab", 0, 2); - TEST_SEARCH ("(a$)$", "ba", 0, 2); - test_match ("a|$b", "$b"); - - /* Mike's curiosity item. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - test_all_registers ("(foo|foobar)(foo|bar)*\\1(foo|bar)*", - "foobarfoobar", "", - 0, 12, 0, 3, 3, 6, 9, 12, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1); - - /* Another one from Mike. */ - test_match ("(foo|foobarfoo)(bar)*", "foobarfoo"); - - /* And another. */ - test_match("(foo|foobar)(bar|barfoo)?\\1", "foobarfoobar"); - - re_set_syntax (RE_NO_BK_PARENS | RE_INTERVALS | RE_NO_BK_VBAR - | RE_NO_BK_BRACES); /* xx get new ones from ext.*/ - test_match ("((a{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)*", "bb"); - test_all_registers ("((a{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)*", "", "bb", - 0, 2, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1); - - test_match ("((a+?*{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)", "b"); - test_all_registers ("((a+?*{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)", "", "b", - 0, 1, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1); - - /* Valid anchoring. */ - /* See generic_test.c and extended_test.c for more search - tests. xx Not sure all these tests are represented in the - search tests. */ - - re_set_syntax (RE_NO_BK_PARENS | RE_NO_BK_VBAR); - valid_nonposix_pattern - ("(((((((((((((((((((((((((((((((((^a)))))))))))))))))))))))))))))))))"); - valid_nonposix_pattern - ("(((((((((((((((((((((((((((((((((a$)))))))))))))))))))))))))))))))))"); - valid_nonposix_pattern ("\\b\\B\\<\\>\\`\\'^a"); - valid_nonposix_pattern ("a$\\b\\B\\<\\>\\`\\'"); - valid_nonposix_pattern ("(^a)"); - valid_nonposix_pattern ("(a$)"); - valid_nonposix_pattern ("(^a)b"); - valid_nonposix_pattern ("b(a$)"); - valid_nonposix_pattern ("(^a|^b)c"); - valid_nonposix_pattern ("c(a$|b$)"); - valid_nonposix_pattern ("(^a|^b)|^c"); - valid_nonposix_pattern ("(a$|b$)|c$"); - valid_nonposix_pattern ("^c|(^a|^b)"); - valid_nonposix_pattern ("c$|(a$|b$)"); - valid_nonposix_pattern ("(^a|^b)c|^d"); - valid_nonposix_pattern ("c(a$|b$)|d$"); - valid_nonposix_pattern ("(((^a|^b))c|^d)e"); - valid_nonposix_pattern ("(c((a|b))|d)e$"); - valid_nonposix_pattern ("^d(c|e((a|b)))"); - valid_nonposix_pattern ("d(c$|e((a$|b$)))"); - valid_nonposix_pattern ("(((^a|^b))c)|^de"); - valid_nonposix_pattern ("(((a|b))c$)|de$"); - - valid_nonposix_pattern ("((a$)$)$"); - valid_nonposix_pattern ("^(^(^a))"); - - valid_nonposix_pattern ("^de|^(c((a|b)))"); - valid_nonposix_pattern ("^de|(^c((a|b)))"); - valid_nonposix_pattern ("de$|(c((a|b)$))"); - valid_nonposix_pattern ("de$|(c((a|b))$)"); - valid_nonposix_pattern ("de$|(c((a|b)))$"); - - valid_nonposix_pattern ("^a(b|c)|^d"); - valid_nonposix_pattern ("a(b$|c$)|d$"); - valid_nonposix_pattern ("^d|^a(b|c)"); - valid_nonposix_pattern ("d$|a(b$|c$)"); - valid_nonposix_pattern ("^d|^(b|c)a"); - valid_nonposix_pattern ("d$|(b|c)a$"); - valid_nonposix_pattern ("^(a)(b|c)|^d"); - valid_nonposix_pattern ("(a)(b|c)$|d$"); - valid_nonposix_pattern ("(^a)(b|c)|^d"); - valid_nonposix_pattern ("(a)(b$|c$)|d$"); - valid_nonposix_pattern ("^d|^(b|c)(a)"); - valid_nonposix_pattern ("d$|(b|c)(a)$"); - valid_nonposix_pattern ("^d|(^b|^c)(a)"); - valid_nonposix_pattern ("d$|(b|c)(a$)"); - valid_nonposix_pattern ("^d|^(a)(b|c)"); - valid_nonposix_pattern ("^d|(^a)(b|c)"); - valid_nonposix_pattern ("d$|(a)(b$|c$)"); - valid_nonposix_pattern ("((^a|^b)|^c)|^d"); - valid_nonposix_pattern ("d$|(c$|(a$|b$))"); - - - /* Tests shouldn't match. */ - test_should_match = false; - - /* Test that RE_CONTEXT_INVALID_OPS has precedence over - RE_CONTEXT_INDEP_OPS. */ - - re_set_syntax (RE_CONTEXT_INDEP_OPS | RE_CONTEXT_INVALID_OPS - | RE_NO_BK_VBAR | RE_NO_BK_PARENS - | RE_NO_BK_BRACES | RE_INTERVALS); - INVALID_PATTERN ("*"); - INVALID_PATTERN ("^*"); - INVALID_PATTERN ("a|*"); - INVALID_PATTERN ("(*)"); - - INVALID_PATTERN ("^+"); - INVALID_PATTERN ("+"); - INVALID_PATTERN ("a|+"); - INVALID_PATTERN ("(+)"); - - INVALID_PATTERN ("^?"); - INVALID_PATTERN ("?"); - INVALID_PATTERN ("a|?"); - INVALID_PATTERN ("(?)"); - - INVALID_PATTERN ("^{1}"); - INVALID_PATTERN ("{1}"); - INVALID_PATTERN ("a|{1}"); - INVALID_PATTERN ("({1})"); - -#if 0 - /* No longer have this syntax option -- POSIX says empty alternatives - are undefined as of draft 11.2. */ - - /* You can't have empty alternatives if RE_NO_EMPTY_ALTS is set. */ - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_ALTS); - - INVALID_PATTERN ("|"); - INVALID_PATTERN ("^|a"); - INVALID_PATTERN ("a|"); - INVALID_PATTERN ("a||"); - INVALID_PATTERN ("a||b"); - INVALID_PATTERN ("(|a)"); - INVALID_PATTERN ("(a|)"); - INVALID_PATTERN ("(a|)"); - - - /* Test above with `\(' and `\)'. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_EMPTY_ALTS); - INVALID_PATTERN ("\\(|a\\)"); - INVALID_PATTERN ("\\(a|\\)"); - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_ALTS); - INVALID_PATTERN ("(|)()$|d$"); -#endif - - /* Test grouping. */ - test_match ("()", "a"); - - /* Test backslashed intervals that are CONTEXTly invalid if have - nothing on which to operate. */ - - re_set_syntax (RE_INTERVALS | RE_CONTEXT_INVALID_OPS); - INVALID_PATTERN ("\\{1\\}"); - - re_set_syntax (0); - test_match ("z-a", "a"); - - re_set_syntax (RE_BK_PLUS_QM); - INVALID_PATTERN ("a*\\"); - - re_set_syntax (0); - INVALID_PATTERN ("a*\\"); - - re_set_syntax (RE_BACKSLASH_ESCAPE_IN_LISTS); - INVALID_PATTERN ("[\\"); - -#if 0 - /* Empty groups are always ok now. (13 Sep 92) */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_GROUPS); - INVALID_PATTERN ("(|)()$|d$"); -#endif - - printf ("\nFinished non-POSIX tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex-0.12/test/printchar.c b/regex-0.12/test/printchar.c @@ -1,14 +0,0 @@ -void -printchar (c) - char c; -{ - if (c < 040 || c >= 0177) - { - putchar ('\\'); - putchar (((c >> 6) & 3) + '0'); - putchar (((c >> 3) & 7) + '0'); - putchar ((c & 7) + '0'); - } - else - putchar (c); -} diff --git a/regex-0.12/test/psx-basic.c b/regex-0.12/test/psx-basic.c @@ -1,253 +0,0 @@ -/* psx-basic.c: Test POSIX basic regular expressions. */ - -#include "test.h" - - -void -test_posix_basic () -{ - /* Intervals can only match up to RE_DUP_MAX occurences of anything. */ - char dup_max_plus_one[6]; - sprintf (dup_max_plus_one, "%d", RE_DUP_MAX + 1); - - printf ("\nStarting POSIX basic tests.\n"); - t = posix_basic_test; - - re_set_syntax (RE_SYNTAX_POSIX_MINIMAL_BASIC); - - test_posix_generic (); - - printf ("\nContinuing POSIX basic tests.\n"); - -/* Grouping tests that are not the same. */ - - test_should_match = false; - invalid_pattern (REG_EPAREN, PARENS_TO_OPS ("a)")); - - test_should_match = true; - /* Special characters. */ - MATCH_SELF ("*"); - test_match ("\\(*\\)", "*"); - test_match ("\\(^*\\)", "*"); - test_match ("**", "***"); - test_match ("***", "****"); - - MATCH_SELF ("{"); /* of extended... */ - MATCH_SELF ("()"); /* also non-Posix. */ - MATCH_SELF ("a+"); - MATCH_SELF ("a?"); - MATCH_SELF ("a|b"); - MATCH_SELF ("a|"); /* No alternations, */ - MATCH_SELF ("|a"); /* so OK if empty. */ - MATCH_SELF ("a||"); - test_match ("\\(|a\\)", "|a"); - test_match ("\\(a|\\)", "a|"); - test_match ("a\\+", "a+"); - test_match ("a\\?", "a?"); - test_match ("a\\|b", "a|b"); - test_match ("^*", "*"); - test_match ("^+", "+"); - test_match ("^?", "?"); - test_match ("^{", "{"); - /* Valid subexpressions - (empty) in basic only. */ - test_match ("\\(\\)", ""); - - test_match ("a\\(\\)", "a"); - test_match ("\\(\\)b", "b"); - test_match ("a\\(\\)b", "ab"); - TEST_REGISTERS ("a\\(\\)b", "ab", 0, 2, 1, 1, -1, -1); - - test_match ("\\(\\)*", ""); - test_match ("\\(\\(\\)\\)*", ""); - /* Valid back references. */ - - /* N.B.: back references to subexpressions that include a * are - undefined in the spec. The tests are in here to see if we handle - the situation consistently, but if it fails any of them, it doesn't - matter. */ - - test_match ("\\(\\)\\1", ""); - TEST_REGISTERS ("\\(\\)\\1", "", 0, 0, 0, 0, -1, -1); - - test_match ("\\(\\(\\)\\)\\(\\)\\2", ""); - - test_match ("\\(a\\)\\1", "aa"); - TEST_REGISTERS ("\\(a\\)\\1", "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\1", "xaax", 1, 3, 1, 2, -1, -1); - - test_match ("\\(\\(a\\)\\)\\1", "aa"); - test_match ("\\(a\\)\\(b\\)\\2\\1", "abba"); - - test_match ("\\(a\\)*\\1", "aa"); - TEST_REGISTERS ("\\(a\\)*\\1", "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)*\\1", "xaax", 0, 0, -1, -1, -1, -1); - - test_match ("\\(\\(a\\)\\2b\\)*", "aab"); - TEST_REGISTERS ("\\(\\(a\\)\\2b\\)*", "aab", 0, 3, 0, 3, 0, 1); - TEST_REGISTERS ("\\(\\(a\\)\\2b\\)*", "xaabx", 0, 0, -1, -1, -1, -1); - - test_match ("\\(a*\\)*\\1", ""); - test_match ("\\(a*\\)*\\1", "aa"); - TEST_REGISTERS ("\\(a*\\)*\\1", "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a*\\)*\\1", "xaax", 0, 0, 0, 0, -1, -1); - - test_match ("\\(a*\\)*\\1", ""); - test_match ("\\(a*\\)*\\1", "aa"); - test_match ("\\(\\(a*\\)*\\)*\\1", "aa"); - test_match ("\\(ab*\\)*\\1", "abab"); - TEST_REGISTERS ("\\(ab*\\)*\\1", "abab", 0, 4, 0, 2, -1, -1); - TEST_REGISTERS ("\\(ab*\\)*\\1", "xababx", 0, 0, -1, -1, -1, -1); - - test_match ("\\(a*\\)ab\\1", "aaba"); - TEST_REGISTERS ("\\(a*\\)ab\\1", "aaba", 0, 4, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a*\\)ab\\1", "xaabax", 1, 5, 1, 2, -1, -1); - - test_match ("\\(a*\\)*ab\\1", "aaba"); - TEST_REGISTERS ("\\(a*\\)*ab\\1", "aaba", 0, 4, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a*\\)*ab\\1", "xaabax", 1, 5, 1, 2, -1, -1); - - test_match ("\\(\\(a*\\)b\\)*\\2", "abb"); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "abb", 0, 3, 2, 3, 2, 2); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "xabbx", 0, 0, -1, -1, -1, -1); - - /* Different from above. */ - test_match ("\\(\\(a*\\)b*\\)*\\2", "aa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aa", 0, 2, 0, 1, 0, 1); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "aba"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aba", 0, 3, 0, 2, 0, 1); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xabax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b\\)*\\2", "aababa"); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "aababa", 0, 6, 3, 5, 3, 4); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "xaababax", 0, 0, -1, -1, -1, -1); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "aabaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aabaa", 0, 5, 0, 3, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaabaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "aabbaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aabbaa", 0, 6, 0, 4, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaabbaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "abaabaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "abaabaa", 0, 7, 2, 5, 2, 4); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaababaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*a\\2", "aabaaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*a\\)*\\2", "aabaaa", 0, 6, 0, 3, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*a\\)*\\2", "xaabaax", 0, 0, -1, -1, -1, -1); - - test_match ("\\(\\(a*\\)b*\\)*\\2a", "aabaaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2a", "aabaaa", 0, 6, 0, 3, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2a", "xaabaaax", 1, 7, 1, 4, 1, 3); - - test_match ("\\(\\(a*\\)b\\)*\\2\\1", "abaabaaaab"); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2\\1", "abaabaaaab", 0, 10, 2, 5, 2, 4); - /* We are matching the empty string here. */ - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2\\1", "xabaabaaaabx", 0, 0, -1, -1, -1, -1); - - test_match ("\\(a*b\\)\\1", "abab"); - test_match ("\\(a\\)\\1\\1", "aaa"); - test_match ("\\(a\\(c\\)d\\)\\1\\2", "acdacdc"); - - test_match ("\\(a\\)\\1*", "aaa"); - TEST_REGISTERS ("\\(a\\)\\1*", "aaa", 0, 3, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\1*", "xaaax", 1, 4, 1, 2, -1, -1); - - test_match ("\\(a\\)\\{1,3\\}b\\1", "aba"); - TEST_REGISTERS ("\\(a\\)\\{1,3\\}b\\1", "aba", 0, 3, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\{1,3\\}b\\1", "xabax", 1, 4, 1, 2, -1, -1); - - test_match ("\\(\\(a\\)\\2\\)*", "aaaa"); /* rms? */ - TEST_REGISTERS ("\\(\\(a*b\\)\\2\\)*", "bbabab", 0, 6, 2, 6, 2, 4); /* rms? */ - - test_match ("\\(\\(a\\)\\1\\)*", "a1a1"); - - test_match ("\\(\\(a\\)\\2\\)\\1", "aaaa"); - - test_match ("\\(\\(a*\\)\\2\\)\\1", "aaaa"); - TEST_REGISTERS ("\\(\\(a*\\)\\2\\)\\1", "aaaa", 0, 4, 0, 2, 0, 1); - TEST_REGISTERS ("\\(\\(a*\\)\\2\\)\\1", "xaaaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\{1\\}", "{1}"); - test_match ("^\\{1\\}", "{1}"); - - test_match ("\\(a\\)\\1\\{1,2\\}", "aaa"); - TEST_REGISTERS ("\\(a\\)\\1\\{1,2\\}", "aaa", 0, 3, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\1\\{1,2\\}", "xaaax", 1, 4, 1, 2, -1, -1); - - - /* Per POSIX D11.1 p. 109, leftmost longest match. */ - - test_match (PARENS_TO_OPS ("(.*).*\\1"), "abcabc"); - - - /* Per POSIX D11.1, p. 125, leftmost longest match. */ - - test_match (PARENS_TO_OPS ("(ac*)c*d[ac]*\\1"), "acdacaaa"); - TEST_REGISTERS (PARENS_TO_OPS ("(ac*)c*d[ac]*\\1"), "acdacaaa", - 0, 8, 0, 1, -1, -1); - - /* Anchors become ordinary, sometimes. */ - MATCH_SELF ("a^"); - MATCH_SELF ("$a"); - MATCH_SELF ("$^"); - test_fastmap ("$a^", "$", 0, 0); - test_match ("$^*", "$^^"); - test_match ("\\($^\\)", "$^"); - test_match ("$*", "$$"); - /* xx -- known bug, solution pending test_match ("^^$", "^"); */ - test_match ("$\\{0,\\}", "$$"); - TEST_SEARCH ("^$*", "$$", 0, 2); - TEST_SEARCH ("^$\\{0,\\}", "$$", 0, 2); - MATCH_SELF ("2^10"); - MATCH_SELF ("$HOME"); - MATCH_SELF ("$1.35"); - - - /* Basic regular expressions, continued; these don't match their strings. */ - test_should_match = false; - - invalid_pattern (REG_EESCAPE, "\\(a\\"); - /* Invalid back references. */ - test_match ("\\(a\\)\\1", "ab"); - test_match ("\\(a\\)\\1\\1", "aab"); - test_match ("\\(a\\)\\(b\\)\\2\\1", "abab"); - test_match ("\\(a\\(c\\)d\\)\\1\\2", "acdc"); - test_match ("\\(a*b\\)\\1", "abaab"); - test_match ("\\(a\\)\\1*", "aaaaaaaaaab"); - test_match ("\\(\\(a\\)\\1\\)*", "aaa"); - invalid_pattern (REG_ESUBREG, "\\1"); - invalid_pattern (REG_ESUBREG, "\\(a\\)\\2"); - test_match ("\\(\\(a\\)\\2\\)*", "abaa"); - test_match ("\\(\\(a\\)\\1\\)*", "a"); - test_match ("\\(\\(a\\)\\2\\)\\1", "abaa"); - test_match ("\\(\\(a*\\)\\2\\)\\1", "abaa"); - /* Invalid intervals. */ - invalid_pattern (REG_EBRACE, "a\\{"); - - invalid_pattern (REG_BADBR, "a\\{-1"); - invalid_pattern (REG_BADBR, concat ("a\\{", (char *)dup_max_plus_one)); - invalid_pattern (REG_BADBR, concat (concat ("a\\{", (char *)dup_max_plus_one), ",")); - invalid_pattern (REG_BADBR, "a\\{1,0"); - - invalid_pattern (REG_EBRACE, "a\\{1"); - invalid_pattern (REG_EBRACE, "a\\{0,"); - invalid_pattern (REG_EBRACE, "a\\{0,1"); - invalid_pattern (REG_EBRACE, "a\\{0,1}"); - - printf ("\nFinished POSIX basic tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex-0.12/test/psx-extend.c b/regex-0.12/test/psx-extend.c @@ -1,1244 +0,0 @@ -/* psx-extend.c: Test POSIX extended regular expressions. */ - -#include "test.h" - - -void -test_posix_extended () -{ - /* Intervals can only match up to RE_DUP_MAX occurences of anything. */ - char dup_max_plus_one[6]; - sprintf (dup_max_plus_one, "%d", RE_DUP_MAX + 1); - - - printf ("\nStarting POSIX extended tests.\n"); - t = posix_extended_test; - - re_set_syntax (RE_SYNTAX_POSIX_MINIMAL_EXTENDED); - - test_posix_generic (); - - printf ("\nContinuing POSIX extended tests.\n"); - - /* Grouping tests that differ from basic's. */ - - test_should_match = true; - MATCH_SELF ("a)"); - - /* Valid use of special characters. */ - test_match ("\\(a", "(a"); - test_match ("a\\+", "a+"); - test_match ("a\\?", "a?"); - test_match ("\\{a", "{a"); - test_match ("\\|a", "|a"); - test_match ("a\\|b", "a|b"); - test_match ("a\\|?", "a"); - test_match ("a\\|?", "a|"); - test_match ("a\\|*", "a"); - test_match ("a\\|*", "a||"); - test_match ("\\(*\\)", ")"); - test_match ("\\(*\\)", "(()"); - test_match ("a\\|+", "a|"); - test_match ("a\\|+", "a||"); - test_match ("\\(+\\)", "()"); - test_match ("\\(+\\)", "(()"); - test_match ("a\\||b", "a|"); - test_match ("\\(?\\)", ")"); - test_match ("\\(?\\)", "()"); - - test_match ("a+", "a"); - test_match ("a+", "aa"); - test_match ("a?", ""); - test_match ("a?", "a"); - - /* Bracket expressions. */ - test_match ("[(]", "("); - test_match ("[+]", "+"); - test_match ("[?]", "?"); - test_match ("[{]", "{"); - test_match ("[|]", "|"); - /* Subexpressions. */ - test_match ("(a+)*", ""); - test_match ("(a+)*", "aa"); - test_match ("(a?)*", ""); - test_match ("(a?)*", "aa"); - /* (No) back references. */ - test_match ("(a)\\1", "a1"); - /* Invalid as intervals, - but are valid patterns. */ - MATCH_SELF ("{"); - test_match ("^{", "{"); - test_match ("a|{", "{"); - test_match ("({)", "{"); - MATCH_SELF ("a{"); - MATCH_SELF ("a{}"); - MATCH_SELF ("a{-1"); - MATCH_SELF ("a{-1}"); - MATCH_SELF ("a{0"); - MATCH_SELF ("a{0,"); - MATCH_SELF (concat ("a{", dup_max_plus_one)); - MATCH_SELF (concat (concat ("a{", dup_max_plus_one), ",")); - MATCH_SELF ("a{1,0"); - MATCH_SELF ("a{1,0}"); - MATCH_SELF ("a{0,1"); - test_match ("[a{0,1}]", "}"); - test_match ("a{1,3}{-1}", "aaa{-1}"); - test_match (concat ("a{1,3}{", dup_max_plus_one), - concat ("aaa{", dup_max_plus_one)); - test_match ("a{1,3}{2,1}", "aaa{2,1}"); - test_match ("a{1,3}{1,2", "aaa{1,2"); - /* Valid consecutive repetitions. */ - test_match ("a*+", "a"); - test_match ("a*?", "a"); - test_match ("a++", "a"); - test_match ("a+*", "a"); - test_match ("a+?", "a"); - test_match ("a??", "a"); - test_match ("a?*", "a"); - test_match ("a?+", "a"); - - test_match ("a{2}?", ""); - test_match ("a{2}?", "aa"); - test_match ("a{2}+", "aa"); - test_match ("a{2}{2}", "aaaa"); - - test_match ("a{1}?*", ""); - test_match ("a{1}?*", "aa"); - - test_match ("(a?){0,3}b", "aaab"); - test_fastmap ("(a?){0,3}b", "ab", 0, 0); - test_match ("(a+){0,3}b", "b"); - test_fastmap ("(a+){0,3}b", "ab", 0, 0); - test_match ("(a+){0,3}b", "ab"); - test_fastmap ("(a+){0,3}b", "ab", 0, 0); - test_match ("(a+){1,3}b", "aaab"); - test_match ("(a?){1,3}b", "aaab"); - - test_match ("\\\\{1}", "\\"); /* Extended only. */ - - test_match ("(a?)?", "a"); - test_match ("(a?b)?c", "abc"); - test_match ("(a+)*b", "b"); - /* Alternatives. */ - test_match ("a|b", "a"); - test_match ("a|b", "b"); - test_fastmap ("a|b", "ab", 0, 0); - - TEST_SEARCH ("a|b", "cb", 0, 2); - TEST_SEARCH ("a|b", "cb", 0, 2); - - test_match ("(a|b|c)", "a"); - test_match ("(a|b|c)", "b"); - test_match ("(a|b|c)", "c"); - - test_match ("(a|b|c)*", "abccba"); - - test_match ("(a(b*))|c", "a"); /* xx do registers. */ - test_match ("(a(b*))|c", "ab"); - test_match ("(a(b*))|c", "c"); - - test_fastmap ("(a+?*|b)", "ab", 0, 0); - test_match ("(a+?*|b)", "b"); - TEST_REGISTERS ("(a+?*|b)", "b", 0, 1, 0, 1, -1, -1); - - test_fastmap ("(a+?*|b)*", "ab", 0, 0); - test_match ("(a+?*|b)*", "bb"); - TEST_REGISTERS ("(a+?*|b)*", "bb", 0, 2, 1, 2, -1, -1); - - test_fastmap ("(a*|b)*", "ab", 0, 0); - test_match ("(a*|b)*", "bb"); - TEST_REGISTERS ("(a*|b)*", "bb", 0, 2, 1, 2, -1, -1); - - test_fastmap ("((a*)|b)*", "ab", 0, 0); - test_match ("((a*)|b)*", "bb"); - TEST_REGISTERS ("((a*)|b)*", "bb", 0, 2, 1, 2, 1, 1); - - test_fastmap ("(a{0,}|b)*", "ab", 0, 0); - test_match ("(a{0,}|b)*", "bb"); - TEST_REGISTERS ("(a{0,}|b)*", "bb", 0, 2, 1, 2, -1, -1); - - test_fastmap ("((a{0,})|b)*", "ab", 0, 0); - test_match ("((a{0,})|b)*", "bb"); - TEST_REGISTERS ("((a{0,})|b)*", "bb", 0, 2, 1, 2, 1, 1); - - /* With c's */ - test_fastmap ("(a+?*|b)c", "abc", 0, 0); - test_match ("(a+?*|b)c", "bc"); - TEST_REGISTERS ("(a+?*|b)c", "bc", 0, 2, 0, 1, -1, -1); - - test_fastmap ("(a+?*|b)*c", "abc", 0, 0); - test_match ("(a+?*|b)*c", "bbc"); - TEST_REGISTERS ("(a+?*|b)*c", "bbc", 0, 3, 1, 2, -1, -1); - - test_fastmap ("(a*|b)*c", "abc", 0, 0); - test_match ("(a*|b)*c", "bbc"); - TEST_REGISTERS ("(a*|b)*c", "bbc", 0, 3, 1, 2, -1, -1); - - test_fastmap ("((a*)|b)*c", "abc", 0, 0); - test_match ("((a*)|b)*c", "bbc"); - TEST_REGISTERS ("((a*)|b)*c", "bbc", 0, 3, 1, 2, 1, 1); - - test_fastmap ("(a{0,}|b)*c", "abc", 0, 0); - test_match ("(a{0,}|b)*c", "bbc"); - TEST_REGISTERS ("(a{0,}|b)*c", "bbc", 0, 3, 1, 2, -1, -1); - - test_fastmap ("((a{0,})|b)*c", "abc", 0, 0); - test_match ("((a{0,})|b)*c", "bbc"); - TEST_REGISTERS ("((a{0,})|b)*c", "bbc", 0, 3, 1, 2, 1, 1); - - - test_fastmap ("((a{0,}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a{0,}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a{0,}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a{0,}\\b\\<)|b)*", "ab", 0, 0); - test_match ("((a{0,}\\b\\<)|b)*", "b"); - TEST_REGISTERS ("((a{0,}\\b\\<)|b)*", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,1}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,1}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,1}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,2}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,2}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,2}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - - test_fastmap ("((a+?*{0,4095}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,4095}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,4095}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,5119}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,5119}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,5119}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,6143}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,6143}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,6143}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,8191}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,8191}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,8191}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,16383}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,16383}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,16383}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - - test_fastmap ("((a+?*{0,}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,}\\b\\<)|b)*", "ab", 0, 0); - test_match ("((a+?*{0,}\\b\\<)|b)*", "b"); - TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)*", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,}\\b\\<)|b)*", "ab", 0, 0); - test_match ("((a+?*{0,}\\b\\<)|b)*", "bb"); - TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)*", "bb", - 0, 2, 1, 2, 0, 0); - - - /* `*' after group. */ - test_match ("(a*|b*)*c", "c"); - TEST_REGISTERS ("(a*|b*)*c", "c", 0, 1, 0, 0, -1, -1); - - test_match ("(a*|b*)*c", "ac"); - TEST_REGISTERS ("(a*|b*)*c", "ac", 0, 2, 0, 1, -1, -1); - - test_match ("(a*|b*)*c", "aac"); - TEST_REGISTERS ("(a*|b*)*c", "aac", 0, 3, 0, 2, -1, -1); - - test_match ("(a*|b*)*c", "bbc"); - TEST_REGISTERS ("(a*|b*)*c", "bbc", 0, 3, 0, 2, -1, -1); - - test_match ("(a*|b*)*c", "abc"); - TEST_REGISTERS ("(a*|b*)*c", "abc", 0, 3, 1, 2, -1, -1); - - /* No `*' after group. */ - test_match ("(a*|b*)c", "c"); - TEST_REGISTERS ("(a*|b*)c", "c", 0, 1, 0, 0, -1, -1); - - test_match ("(a*|b*)c", "ac"); - TEST_REGISTERS ("(a*|b*)c", "ac", 0, 2, 0, 1, -1, -1); - - test_match ("(a*|b*)c", "bc"); - TEST_REGISTERS ("(a*|b*)c", "bc", 0, 2, 0, 1, -1, -1); - - test_match ("(a*|b*)c", "aac"); - TEST_REGISTERS ("(a*|b*)c", "aac", 0, 3, 0, 2, -1, -1); - - /* Same as above, but with no `*'s in alternatives. - - test_match ("(a|b)*c", "c"); /* `*' after group. */ - TEST_REGISTERS ("(a|b)*c", "c", 0, 1, -1, -1, -1, -1); - - test_match ("(a|b)*c", "ac"); - TEST_REGISTERS ("(a|b)*c", "ac", 0, 2, 0, 1, -1, -1); - - test_match ("(a|b)*c", "bc"); - TEST_REGISTERS ("(a|b)*c", "bc", 0, 2, 0, 1, -1, -1); - - test_match ("(a|b)*c", "abc"); - TEST_REGISTERS ("(a|b)*c", "abc", 0, 3, 1, 2, -1, -1); - - - test_match ("(a*|b*)c", "bbc"); - TEST_REGISTERS ("(a*|b*)c", "bbc", 0, 3, 0, 2, -1, -1); - - /* Complicated second alternative. */ - - test_match ("(a*|(b*)*)*c", "bc"); - TEST_REGISTERS ("(a*|(b*)*)*c", "bc", 0, 2, 0, 1, 0, 1); - - test_match ("(a*|(b*|c*)*)*d", "bd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "bd", 0, 2, 0, 1, 0, 1); - - test_match ("(a*|(b*|c*)*)*d", "bbd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "bbd", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|(b*|c*)*)*d", "cd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "cd", 0, 2, 0, 1, 0, 1); - - test_match ("(a*|(b*|c*)*)*d", "ccd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "ccd", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|b*|c*)*d", "aad"); - TEST_REGISTERS ("(a*|b*|c*)*d", "aad", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|b*|c*)*d", "bbd"); - TEST_REGISTERS ("(a*|b*|c*)*d", "bbd", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|b*|c*)*d", "ccd"); - TEST_REGISTERS ("(a*|b*|c*)*d", "ccd", 0, 3, 0, 2, 0, 2); - - /* Valid anchoring. */ - valid_pattern ("a^"); - valid_pattern ("a^b"); - valid_pattern ("$a"); - valid_pattern ("a$b"); - valid_pattern ("foo^bar"); - valid_pattern ("foo$bar"); - valid_pattern ("(^)"); - valid_pattern ("($)"); - valid_pattern ("(^$)"); - - /* These are the same (but valid) as those (invalid) in other_test.c. */ - valid_pattern - ("(((((((((((((((((((((((((((((((((a^)))))))))))))))))))))))))))))))))"); - valid_pattern - ("((((((((((((((((((((((((((((((((($a)))))))))))))))))))))))))))))))))"); - valid_pattern ("\\(^a\\)"); - valid_pattern ("a\\|^b"); - valid_pattern ("\\w^a"); - valid_pattern ("\\W^a"); - valid_pattern ("(a^)"); - valid_pattern ("($a)"); - valid_pattern ("a(^b)"); - valid_pattern ("a$(b)"); - valid_pattern ("(a)^b"); - valid_pattern ("(a)$b"); - valid_pattern ("(a)(^b)"); - valid_pattern ("(a$)(b)"); - valid_pattern ("(a|b)^c"); - valid_pattern ("(a|b)$c"); - valid_pattern ("(a$|b)c"); - valid_pattern ("(a|b$)c"); - valid_pattern ("a(b|^c)"); - valid_pattern ("a(^b|c)"); - valid_pattern ("a$(b|c)"); - valid_pattern ("(a)(^b|c)"); - valid_pattern ("(a)(b|^c)"); - valid_pattern ("(b$|c)(a)"); - valid_pattern ("(b|c$)(a)"); - valid_pattern ("(a(^b|c))"); - valid_pattern ("(a(b|^c))"); - valid_pattern ("((b$|c)a)"); - valid_pattern ("((b|c$)a)"); - valid_pattern ("((^a|^b)^c)"); - valid_pattern ("(c$(a$|b$))"); - valid_pattern ("((^a|^b)^c)"); - valid_pattern ("((a$|b$)c)"); - valid_pattern ("(c$(a$|b$))"); - valid_pattern ("((^a|^b)|^c)^d"); - valid_pattern ("((a$|b$)|c$)d$"); - valid_pattern ("d$(c$|(a$|b$))"); - valid_pattern ("((^a|^b)|^c)(^d)"); - valid_pattern ("((a$|b$)|c$)(d$)"); - valid_pattern ("(d$)((a$|b$)|c$)"); - valid_pattern ("((^a|^b)|^c)((^d))"); - valid_pattern ("((a$|b$)|c$)((d$))"); - valid_pattern ("((d$))((a$|b$)|c$)"); - valid_pattern ("(((^a|^b))c|^d)^e"); - valid_pattern ("(((a$|b$))c|d$)$e$"); - valid_pattern ("e$(d$|c((a$|b$)))"); - valid_pattern ("(^a)((^b))"); - valid_pattern ("(a$)((b$))"); - valid_pattern ("((^a))(^b)"); - valid_pattern ("((a$))(b$)"); - valid_pattern ("((^a))((^b))"); - valid_pattern ("((a$))((b$))"); - valid_pattern ("((^a)^b)"); - valid_pattern ("((a$)b$)"); - valid_pattern ("(b$(a$))"); - valid_pattern ("(((^a)b)^c)"); - valid_pattern ("(((a$)b)c$)"); - valid_pattern ("(c$(b(a$)))"); - valid_pattern ("(((^a)b)c)^d"); - valid_pattern ("(((a$)b)c)d$"); - valid_pattern ("d$(c(b(a$)))"); - valid_pattern (".^a"); - valid_pattern ("a$."); - valid_pattern ("[a]^b"); - valid_pattern ("b$[a]"); - valid_pattern ("\\(a$\\)"); - valid_pattern ("a$\\|b"); - valid_pattern ("(^a|^b)^c"); - valid_pattern ("c$(a$|b$)"); - valid_pattern ("(^a|^b)^|^c"); - valid_pattern ("(a$|b$)$|$c$"); - valid_pattern ("(a$|$b$)$|c$"); - valid_pattern ("($a$|b$)$|c$"); - valid_pattern ("$(a$|b$)$|c$"); - valid_pattern ("^c|d(^a|^b)"); - valid_pattern ("(^a|^b)|d^c"); - valid_pattern ("c$|(a$|b$)d"); - valid_pattern ("c$d|(a$|b$)"); - valid_pattern ("c(^a|^b)|^d"); - valid_pattern ("(a$|b$)c|d$"); - valid_pattern ("c(((^a|^b))|^d)e"); - valid_pattern ("(c((^a|^b))|^d)e"); - valid_pattern ("((c(^a|^b))|^d)e"); - valid_pattern ("(((^a|^b))|c^d)e"); - valid_pattern ("(((^a|^b))|^d)^e"); - valid_pattern ("(c$((a|b))|d)e$"); - valid_pattern ("(c((a$|b$))|d)e$"); - valid_pattern ("(c((a|b)$)|d)e$"); - valid_pattern ("(c((a|b))|d$)e$"); - valid_pattern ("^d(^c|e((a|b)))"); - valid_pattern ("^d(c|^e((a|b)))"); - valid_pattern ("^d(c|e(^(a|b)))"); - valid_pattern ("^d(c|e((^a|b)))"); - valid_pattern ("^d(c|e((a|^b)))"); - valid_pattern ("^d(c|e((a|b^)))"); - valid_pattern ("^d(c|e((a|b)^))"); - valid_pattern ("^d(c|e((a|b))^)"); - valid_pattern ("^d(c|e((a|b)))^"); - valid_pattern ("d$(c$|e((a$|b$)))"); - valid_pattern ("d(c$|e$((a$|b$)))"); - valid_pattern ("(((^a|^b))^c)|^de"); - valid_pattern ("(((^a|^b))c)|^d^e"); - valid_pattern ("(((a$|b))c$)|de$"); - valid_pattern ("(((a|b$))c$)|de$"); - valid_pattern ("(((a|b))c$)|d$e$"); - valid_pattern ("^d^e|^(c((a|b)))"); - valid_pattern ("^de|^(c^((a|b)))"); - valid_pattern ("^de|^(c(^(a|b)))"); - valid_pattern ("^de|^(c((^a|b)))"); - valid_pattern ("^de|^(c((a|^b)))"); - valid_pattern ("^de|(^c(^(a|b)))"); - valid_pattern ("^de|(^c((^a|b)))"); - valid_pattern ("^de|(^c((a|^b)))"); - valid_pattern ("de$|(c($(a|b)$))"); - valid_pattern ("de$|(c$((a|b)$))"); - valid_pattern ("de$|($c((a|b)$))"); - valid_pattern ("de$|$(c((a|b)$))"); - valid_pattern ("de$|(c($(a|b))$)"); - valid_pattern ("de$|(c$((a|b))$)"); - valid_pattern ("de$|$(c((a|b))$)"); - valid_pattern ("de$|(c($(a|b)))$"); - valid_pattern ("de$|(c$((a|b)))$"); - valid_pattern ("de$|($c((a|b)))$"); - valid_pattern ("de$|$(c((a|b)))$"); - valid_pattern ("^a(^b|c)|^d"); - valid_pattern ("^a(b|^c)|^d"); - valid_pattern ("^a(b|c^)|^d"); - valid_pattern ("^a(b|c)^|^d"); - valid_pattern ("a$(b$|c$)|d$"); - valid_pattern ("^d|^a(^b|c)"); - valid_pattern ("^d|^a(b|^c)"); - valid_pattern ("d$|a$(b$|c$)"); - valid_pattern ("^d|^(b|c)^a"); - valid_pattern ("d$|(b|c$)a$"); - valid_pattern ("d$|(b$|c)a$"); - valid_pattern ("^(a)^(b|c)|^d"); - valid_pattern ("^(a)(^b|c)|^d"); - valid_pattern ("^(a)(b|^c)|^d"); - valid_pattern ("(a)$(b|c)$|d$"); - valid_pattern ("(a$)(b|c)$|d$"); - valid_pattern ("(^a)(^b|c)|^d"); - valid_pattern ("(^a)(b|^c)|^d"); - valid_pattern ("(a)$(b$|c$)|d$"); - valid_pattern ("(a$)(b$|c$)|d$"); - valid_pattern ("^d|^(b|c)^(a)"); - valid_pattern ("^d|^(b|c)(^a)"); - valid_pattern ("d$|(b|c$)(a)$"); - valid_pattern ("d$|(b$|c)(a)$"); - valid_pattern ("^d|(^b|^c)^(a)"); - valid_pattern ("^d|(^b|^c)(^a)"); - valid_pattern ("d$|(b|c)$(a$)"); - valid_pattern ("d$|(b|c$)(a$)"); - valid_pattern ("d$|(b$|c)(a$)"); - valid_pattern ("^d|^(a)^(b|c)"); - valid_pattern ("^d|^(a)(^b|c)"); - valid_pattern ("^d|^(a)(b|^c)"); - valid_pattern ("^d|(^a)^(b|c)"); - valid_pattern ("^d|(^a)(^b|c)"); - valid_pattern ("^d|(^a)(b|^c)"); - valid_pattern ("d$|(a)$(b$|c$)"); - valid_pattern ("d$|(a$)(b$|c$)"); - valid_pattern ("((e^a|^b)|^c)|^d"); - valid_pattern ("((^a|e^b)|^c)|^d"); - valid_pattern ("((^a|^b)|e^c)|^d"); - valid_pattern ("((^a|^b)|^c)|e^d"); - valid_pattern ("d$e|(c$|(a$|b$))"); - valid_pattern ("d$|(c$e|(a$|b$))"); - valid_pattern ("d$|(c$|(a$e|b$))"); - valid_pattern ("d$|(c$|(a$|b$e))"); - valid_pattern ("d$|(c$|(a$|b$)e)"); - valid_pattern ("d$|(c$|(a$|b$))e"); - valid_pattern ("(a|b)^|c"); - valid_pattern ("(a|b)|c^"); - valid_pattern ("$(a|b)|c"); - valid_pattern ("(a|b)|$c"); - valid_pattern ("(a^|^b)|^c"); - valid_pattern ("(^a|b^)|^c"); - valid_pattern ("(^a|^b)|c^"); - valid_pattern ("($a|b$)|c$"); - valid_pattern ("(a$|$b)|c$"); - valid_pattern ("(a$|b$)|$c"); - valid_pattern ("c^|(^a|^b)"); - valid_pattern ("^c|(a^|^b)"); - valid_pattern ("^c|(^a|b^)"); - valid_pattern ("$c|(a$|b$)"); - valid_pattern ("c$|($a|b$)"); - valid_pattern ("c$|(a$|$b)"); - valid_pattern ("c^|^(a|b)"); - valid_pattern ("^c|(a|b)^"); - valid_pattern ("$c|(a|b)$"); - valid_pattern ("c$|$(a|b)"); - valid_pattern ("(a^|^b)c|^d"); - valid_pattern ("(^a|b^)c|^d"); - valid_pattern ("(^a|^b)c|d^"); - valid_pattern ("(^a|^b)^c|^d"); - valid_pattern ("(a|b)c$|$d"); - valid_pattern ("(a|b)$c$|d$"); - valid_pattern ("(a|b)$c$|d$"); - valid_pattern ("(a|b$)c$|d$"); - valid_pattern ("(a$|b)c$|d$"); - valid_pattern ("($a|b)c$|d$"); - valid_pattern ("$(a|b)c$|d$"); - valid_pattern ("^d|^c^(a|b)"); - valid_pattern ("^d|^c(^a|b)"); - valid_pattern ("^d|^c(a|^b)"); - valid_pattern ("^d|^c(a|b^)"); - valid_pattern ("^d|^c(a|b)^"); - valid_pattern ("$d|c(a$|b$)"); - valid_pattern ("d$|c($a$|b$)"); - valid_pattern ("d$|c$(a$|b$)"); - valid_pattern ("d$|$c(a$|b$)"); - - valid_pattern ("(((a^|^b))c|^d)e"); - valid_pattern ("(((^a|b^))c|^d)e"); - valid_pattern ("(((^a|^b))^c|^d)e"); - valid_pattern ("((^(a|b))c|d^)e"); - valid_pattern ("(^((a|b))c|^d)^e"); - valid_pattern ("(^((a|b)^)c|^d)e"); - valid_pattern ("(^((a^|b))c|^d)e"); - valid_pattern ("(^((a|b^))c|^d)e"); - valid_pattern ("(^((a|b)^)c|^d)e"); - valid_pattern ("(^((a|b))^c|^d)e"); - valid_pattern ("(^((a|b))c^|^d)e"); - valid_pattern ("(^((a|b))c|^d^)e"); - valid_pattern ("(^((a|b))c|^d)^e"); - valid_pattern ("(((a|b))c|d)$e$"); - valid_pattern ("(((a|b))c|d$)e$"); - valid_pattern ("(((a|b))c|$d)e$"); - valid_pattern ("(((a|b))c$|d)e$"); - valid_pattern ("(((a|b))$c|d)e$"); - valid_pattern ("(((a|b)$)c|d)e$"); - valid_pattern ("(((a|b$))c|d)e$"); - valid_pattern ("(((a$|b))c|d)e$"); - valid_pattern ("((($a|b))c|d)e$"); - valid_pattern ("(($(a|b))c|d)e$"); - valid_pattern ("($((a|b))c|d)e$"); - valid_pattern ("$(((a|b))c|d)e$"); - valid_pattern ("(^((a|b)^)c|^d)e"); - valid_pattern ("(^((a|b))^c|^d)e"); - valid_pattern ("(^((a|b))c|^d^)e"); - valid_pattern ("(^((a|b))c|^d)^e"); - - valid_pattern ("^e(^d|c((a|b)))"); - valid_pattern ("^e(d|^c((a|b)))"); - valid_pattern ("^e(d|c^((a|b)))"); - valid_pattern ("^e(d|c(^(a|b)))"); - valid_pattern ("^e(d|c((^a|b)))"); - valid_pattern ("^e(d|c((a|^b)))"); - valid_pattern ("^e(d|c((a|b^)))"); - valid_pattern ("^e(d|c((a|b)^))"); - valid_pattern ("^e(d|c((a|b))^)"); - valid_pattern ("^e(d|c((a|b)))^"); - valid_pattern ("e$(d$|c((a$|b$)))"); - valid_pattern ("e(d$|c$((a$|b$)))"); - valid_pattern ("e(d$|c($(a$|b$)))"); - valid_pattern ("e(d$|c(($a$|b$)))"); - valid_pattern ("e$(d$|c((a|b)$))"); - valid_pattern ("e($d$|c((a|b)$))"); - valid_pattern ("e(d$|$c((a|b)$))"); - valid_pattern ("e(d$|c$((a|b)$))"); - valid_pattern ("e(d$|c($(a|b)$))"); - valid_pattern ("e(d$|c(($a|b)$))"); - valid_pattern ("e(d$|c((a|$b)$))"); - valid_pattern ("e(d$|c((a$|$b$)))"); - - valid_pattern ("e$(d$|c((a|b))$)"); - valid_pattern ("e($d$|c((a|b))$)"); - valid_pattern ("e(d$|$c((a|b))$)"); - valid_pattern ("e(d$|c$((a|b))$)"); - valid_pattern ("e(d$|c($(a|b))$)"); - valid_pattern ("e(d$|c(($a|b))$)"); - valid_pattern ("e(d$|c((a|$b))$)"); - valid_pattern ("e$(d$|c((a|b)))$"); - valid_pattern ("e($d$|c((a|b)))$"); - valid_pattern ("e(d$|$c((a|b)))$"); - valid_pattern ("e(d$|c$((a|b)))$"); - valid_pattern ("e(d$|c($(a|b)))$"); - valid_pattern ("e(d$|c(($a|b)))$"); - valid_pattern ("e(d$|c((a|$b)))$"); - valid_pattern ("(((^a|^b)^)c)|^de"); - valid_pattern ("(((^a|^b))^c)|^de"); - valid_pattern ("(((^a|^b))c)^|^de"); - valid_pattern ("$(((a|b))c$)|de$"); - valid_pattern ("($((a|b))c$)|de$"); - valid_pattern ("(($(a|b))c$)|de$"); - valid_pattern ("((($a|b))c$)|de$"); - valid_pattern ("(((a|$b))c$)|de$"); - valid_pattern ("(((a|b)$)c$)|de$"); - valid_pattern ("(((a|b))$c$)|de$"); - valid_pattern ("$(((a|b))c)$|de$"); - valid_pattern ("($((a|b))c)$|de$"); - valid_pattern ("(($(a|b))c)$|de$"); - valid_pattern ("((($a|b))c)$|de$"); - valid_pattern ("(((a|$b))c)$|de$"); - valid_pattern ("(((a|b)$)c)$|de$"); - valid_pattern ("(((a|b))$c)$|de$"); - valid_pattern ("^ed|^(c((a|b)))^"); - valid_pattern ("^ed|^(c((a|b))^)"); - valid_pattern ("^ed|^(c((a|b)^))"); - valid_pattern ("^ed|^(c((a|b^)))"); - valid_pattern ("^ed|^(c((a^|b)))"); - valid_pattern ("^ed|^(c((^a|b)))"); - valid_pattern ("^ed|^(c(^(a|b)))"); - valid_pattern ("^ed|^(c^((a|b)))"); - valid_pattern ("^ed|(^c((a|b)))^"); - valid_pattern ("^ed|(^c((a|b))^)"); - valid_pattern ("^ed|(^c((a|b)^))"); - valid_pattern ("^ed|(^c((a|b^)))"); - valid_pattern ("^ed|(^c((a|^b)))"); - valid_pattern ("^ed|(^c((a^|b)))"); - valid_pattern ("^ed|(^c((^a|b)))"); - valid_pattern ("^ed|(^c(^(a|b)))"); - valid_pattern ("^ed|(^c(^(a|b)))"); - valid_pattern ("^ed|(^c^((a|b)))"); - valid_pattern ("ed$|$(c((a|b)))$"); - valid_pattern ("ed$|($c((a|b)))$"); - valid_pattern ("ed$|(c$((a|b)))$"); - valid_pattern ("ed$|(c($(a|b)))$"); - valid_pattern ("ed$|(c(($a|b)))$"); - valid_pattern ("ed$|(c((a|$b)))$"); - valid_pattern ("ed$|$(c((a|b))$)"); - valid_pattern ("ed$|($c((a|b))$)"); - valid_pattern ("ed$|(c$((a|b))$)"); - valid_pattern ("ed$|(c($(a|b))$)"); - valid_pattern ("ed$|(c(($a|b))$)"); - valid_pattern ("ed$|(c((a|$b))$)"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a$|b$)))"); - valid_pattern ("ed$|($c((a$|b$)))"); - valid_pattern ("ed$|(c$((a$|b$)))"); - valid_pattern ("ed$|(c($(a$|b$)))"); - valid_pattern ("ed$|(c(($a$|b$)))"); - valid_pattern ("ed$|(c((a$|$b$)))"); - valid_pattern ("^a(b|c)^|^d"); - valid_pattern ("^a(b|c^)|^d"); - valid_pattern ("^a(b|^c)|^d"); - valid_pattern ("^a(b^|c)|^d"); - valid_pattern ("^a(^b|c)|^d"); - valid_pattern ("^a^(b|c)|^d"); - valid_pattern ("$a(b$|c$)|d$"); - valid_pattern ("a$(b$|c$)|d$"); - valid_pattern ("a($b$|c$)|d$"); - valid_pattern ("a(b$|$c$)|d$"); - valid_pattern ("a(b$|c$)|$d$"); - valid_pattern ("^(a^)(b|c)|^d"); - valid_pattern ("^(a)^(b|c)|^d"); - valid_pattern ("^(a)(^b|c)|^d"); - valid_pattern ("^(a)(b^|c)|^d"); - valid_pattern ("^(a)(b|^c)|^d"); - valid_pattern ("^(a)(b|c^)|^d"); - valid_pattern ("^(a)(b|c)^|^d"); - valid_pattern ("(^a^)(b|c)|^d"); - valid_pattern ("(^a)^(b|c)|^d"); - valid_pattern ("(^a)(^b|c)|^d"); - valid_pattern ("(^a)(b^|c)|^d"); - valid_pattern ("(^a)(b|^c)|^d"); - valid_pattern ("(^a)(b|c^)|^d"); - valid_pattern ("(^a)(b|c)^|^d"); - - valid_pattern ("(a)(b$|c$)d$"); - valid_pattern ("(a)(b|$c)$|d$"); - valid_pattern ("(a)($b|c)$|d$"); - valid_pattern ("(a)$(b|c)$|d$"); - valid_pattern ("(a$)(b|c)$|d$"); - valid_pattern ("($a)(b|c)$|d$"); - valid_pattern ("$(a)(b|c)$|d$"); - valid_pattern ("(b|c)($a)$|d$"); - valid_pattern ("(b|c)$(a)$|d$"); - valid_pattern ("(b|c$)(a)$|d$"); - valid_pattern ("(b|$c)(a)$|d$"); - valid_pattern ("(b$|c)(a)$|d$"); - valid_pattern ("($b|c)(a)$|d$"); - valid_pattern ("$(b|c)(a)$|d$"); - valid_pattern ("(b|c)($a$)|d$"); - valid_pattern ("(b|c)$(a$)|d$"); - valid_pattern ("(b|c$)(a$)|d$"); - valid_pattern ("(b|$c)(a$)|d$"); - valid_pattern ("(b$|c)(a$)|d$"); - valid_pattern ("($b|c)(a$)|d$"); - valid_pattern ("$(b|c)(a$)|d$"); - valid_pattern ("(a)$(b$|c$)|d$"); - valid_pattern ("(a$)(b$|c$)|d$"); - valid_pattern ("($a)(b$|c$)|d$"); - valid_pattern ("$(a)(b$|c$)|d$"); - valid_pattern ("^d|^(b^|c)(a)"); - valid_pattern ("^d|^(b|c^)(a)"); - valid_pattern ("^d|^(b|c)^(a)"); - valid_pattern ("^d|^(b|c)(^a)"); - valid_pattern ("^d|^(b|c)(a^)"); - valid_pattern ("^d|^(b|c)(a)^"); - valid_pattern ("^d|(^b|^c^)(a)"); - valid_pattern ("^d|(^b|^c)^(a)"); - valid_pattern ("^d|(^b|^c)(^a)"); - valid_pattern ("^d|(^b|^c)(a^)"); - valid_pattern ("^d|(^b|^c)(a)^"); - valid_pattern ("d$|(b|c)($a$)"); - valid_pattern ("d$|(b|c)$(a$)"); - valid_pattern ("d$|(b|c$)(a$)"); - valid_pattern ("d$|(b$|c)(a$)"); - valid_pattern ("d$|($b|c)(a$)"); - valid_pattern ("d$|$(b|c)(a$)"); - valid_pattern ("d$|(b|c)($a)$"); - valid_pattern ("d$|(b|c)$(a)$"); - valid_pattern ("d$|(b|c$)(a)$"); - valid_pattern ("d$|(b$|c)(a)$"); - valid_pattern ("d$|($b|c)(a)$"); - valid_pattern ("d$|$(b|c)(a)$"); - valid_pattern ("^d|^(a^)(b|c)"); - valid_pattern ("^d|^(a)^(b|c)"); - valid_pattern ("^d|^(a)(^b|c)"); - valid_pattern ("^d|^(a)(b^|c)"); - valid_pattern ("^d|^(a)(b|^c)"); - valid_pattern ("^d|^(a)(b|c^)"); - valid_pattern ("^d|^(a)(b|c)^"); - valid_pattern ("^d|(^a^)(b|c)"); - valid_pattern ("^d|(^a)^(b|c)"); - valid_pattern ("^d|(^a)(^b|c)"); - valid_pattern ("^d|(^a)(b^|c)"); - valid_pattern ("^d|(^a)(b|^c)"); - valid_pattern ("^d|(^a)(b|c^)"); - valid_pattern ("^d|(^a)(b|c)^"); - valid_pattern ("d$|(a)$(b$|c$)"); - valid_pattern ("d$|(a$)(b$|c$)"); - valid_pattern ("d$|($a)(b$|c$)"); - valid_pattern ("d$|$(a)(b$|c$)"); - valid_pattern ("d$|(a)(b|$c)$"); - valid_pattern ("d$|(a)($b|c)$"); - valid_pattern ("d$|(a)$(b|c)$"); - valid_pattern ("d$|(a$)(b|c)$"); - valid_pattern ("d$|($a)(b|c)$"); - valid_pattern ("d$|$(a)(b|c)$"); - valid_pattern ("((^a|^b)|^c)|^d^"); - valid_pattern ("((^a|^b)|^c)^|^d"); - valid_pattern ("((^a|^b)|^c^)|^d"); - valid_pattern ("((^a|^b)^|^c)|^d"); - valid_pattern ("((^a|^b^)|^c)|^d"); - valid_pattern ("((^a^|^b)|^c)|^d"); - valid_pattern ("((a|b)|c)|$d$"); - valid_pattern ("((a|b)|$c)|d$"); - valid_pattern ("((a|$b)|c)|d$"); - valid_pattern ("(($a|b)|c)|d$"); - valid_pattern ("($(a|b)|c)|d$"); - valid_pattern ("$((a|b)|c)|d$"); - valid_pattern ("^d^|(c|(a|b))"); - valid_pattern ("^d|(c^|(a|b))"); - valid_pattern ("^d|(c|(a^|b))"); - valid_pattern ("^d|(c|(a|b^))"); - valid_pattern ("^d|(c|(a|b)^)"); - valid_pattern ("^d|(c|(a|b))^"); - valid_pattern ("d$|(c$|(a$|$b$))"); - valid_pattern ("d$|(c$|($a$|b$))"); - valid_pattern ("d$|($c$|(a$|b$))"); - valid_pattern ("d$|$(c$|(a$|b$))"); - valid_pattern ("$d$|(c$|(a$|b$))"); - valid_pattern ("d$|(c$|(a|$b)$)"); - valid_pattern ("d$|(c$|($a|b)$)"); - valid_pattern ("d$|($c$|(a|b)$)"); - valid_pattern ("d$|$(c$|(a|b)$)"); - valid_pattern ("$d$|(c$|(a|b)$)"); - valid_pattern ("d$|(c$|(a|$b))$"); - valid_pattern ("d$|(c$|($a|b))$"); - valid_pattern ("d$|($c$|(a|b))$"); - valid_pattern ("d$|$(c$|(a|b))$"); - valid_pattern ("$d$|(c$|(a|b))$"); - valid_pattern ("^c^|(^a|^b)"); - valid_pattern ("^c|(^a^|^b)"); - valid_pattern ("^c|(^a|^b^)"); - valid_pattern ("^c|(^a|^b)^"); - valid_pattern ("c$|(a$|$b$)"); - valid_pattern ("c$|($a$|b$)"); - valid_pattern ("c$|$(a$|b$)"); - valid_pattern ("$c$|(a$|b$)"); - valid_pattern ("^d^(c|e((a|b)))"); - valid_pattern ("^d(^c|e((a|b)))"); - valid_pattern ("^d(c^|e((a|b)))"); - valid_pattern ("^d(c|^e((a|b)))"); - valid_pattern ("^d(c|e^((a|b)))"); - valid_pattern ("^d(c|e(^(a|b)))"); - valid_pattern ("^d(c|e((^a|b)))"); - valid_pattern ("^d(c|e((a|^b)))"); - valid_pattern ("^d(c|e((a|b^)))"); - valid_pattern ("^d(c|e((a|b)^))"); - valid_pattern ("^d(c|e((a|b))^)"); - valid_pattern ("^d(c|e((a|b)))^"); - valid_pattern ("d(c$|e($(a$|b$)))"); - valid_pattern ("d(c$|e$((a$|b$)))"); - valid_pattern ("d(c$|$e((a$|b$)))"); - valid_pattern ("d($c$|e((a$|b$)))"); - valid_pattern ("d$(c$|e((a$|b$)))"); - valid_pattern ("$d(c$|e((a$|b$)))"); - valid_pattern ("^d|^a^(b|c)"); - valid_pattern ("^d|^a(^b|c)"); - valid_pattern ("^d|^a(b^|c)"); - valid_pattern ("^d|^a(b|^c)"); - valid_pattern ("^d|^a(b|c^)"); - valid_pattern ("^d|^a(b|c)^"); - valid_pattern ("d$|a($b$|c$)"); - valid_pattern ("d$|a$(b$|c$)"); - valid_pattern ("d$|$a(b$|c$)"); - valid_pattern ("$d$|a(b$|c$)"); - valid_pattern ("^d|^(b^|c)a"); - valid_pattern ("^d|^(b|c^)a"); - valid_pattern ("^d|^(b|c)^a"); - valid_pattern ("^d|^(b|c)a^"); - valid_pattern ("d$|(b|c)$a$"); - valid_pattern ("d$|(b|c$)a$"); - valid_pattern ("d$|(b|$c)a$"); - valid_pattern ("d$|(b$|c)a$"); - valid_pattern ("d$|($b|c)a$"); - valid_pattern ("d$|$(b|c)a$"); - valid_pattern ("$d$|(b|c)a$"); - - /* xx Do these use all the valid_nonposix_pattern ones in other_test.c? */ - - TEST_SEARCH ("(^a|^b)c", "ac", 0, 2); - TEST_SEARCH ("(^a|^b)c", "bc", 0, 2); - TEST_SEARCH ("c(a$|b$)", "ca", 0, 2); - TEST_SEARCH ("c(a$|b$)", "cb", 0, 2); - TEST_SEARCH ("^(a|b)|^c", "ad", 0, 2); - TEST_SEARCH ("^(a|b)|^c", "bd", 0, 2); - TEST_SEARCH ("(a|b)$|c$", "da", 0, 2); - TEST_SEARCH ("(a|b)$|c$", "db", 0, 2); - TEST_SEARCH ("(a|b)$|c$", "dc", 0, 2); - TEST_SEARCH ("(^a|^b)|^c", "ad", 0, 2); - TEST_SEARCH ("(^a|^b)|^c", "bd", 0, 2); - TEST_SEARCH ("(^a|^b)|^c", "cd", 0, 2); - TEST_SEARCH ("(a$|b$)|c$", "da", 0, 2); - TEST_SEARCH ("(a$|b$)|c$", "db", 0, 2); - TEST_SEARCH ("(a$|b$)|c$", "dc", 0, 2); - TEST_SEARCH ("^c|(^a|^b)", "ad", 0, 2); - TEST_SEARCH ("^c|(^a|^b)", "bd", 0, 2); - TEST_SEARCH ("^c|(^a|^b)", "cd", 0, 2); - TEST_SEARCH ("c$|(a$|b$)", "da", 0, 2); - TEST_SEARCH ("c$|(a$|b$)", "db", 0, 2); - TEST_SEARCH ("c$|(a$|b$)", "dc", 0, 2); - TEST_SEARCH ("^c|^(a|b)", "ad", 0, 2); - TEST_SEARCH ("^c|^(a|b)", "bd", 0, 2); - TEST_SEARCH ("^c|^(a|b)", "cd", 0, 2); - TEST_SEARCH ("c$|(a|b)$", "da", 0, 2); - TEST_SEARCH ("c$|(a|b)$", "db", 0, 2); - TEST_SEARCH ("c$|(a|b)$", "dc", 0, 2); - TEST_SEARCH ("(^a|^b)c|^d", "ace", 0, 3); - TEST_SEARCH ("(^a|^b)c|^d", "bce", 0, 3); - TEST_SEARCH ("(^a|^b)c|^d", "de", 0, 2); - TEST_SEARCH ("(a|b)c$|d$", "eac", 0, 3); - TEST_SEARCH ("(a|b)c$|d$", "ebc", 0, 3); - TEST_SEARCH ("(a|b)c$|d$", "ed", 0, 3); - TEST_SEARCH ("^d|^c(a|b)", "cae", 0, 3); - TEST_SEARCH ("^d|^c(a|b)", "cbe", 0, 3); - TEST_SEARCH ("^d|^c(a|b)", "de", 0, 3); - TEST_SEARCH ("d$|c(a$|b$)", "eca", 0, 3); - TEST_SEARCH ("d$|c(a$|b$)", "ecb", 0, 3); - TEST_SEARCH ("d$|c(a$|b$)", "ed", 0, 3); - - TEST_SEARCH ("(((^a|^b))c|^d)e", "acef", 0, 4); - TEST_SEARCH ("(((^a|^b))c|^d)e", "bcef", 0, 4); - TEST_SEARCH ("(((^a|^b))c|^d)e", "def", 0, 3); - - TEST_SEARCH ("((^(a|b))c|^d)e", "acef", 0, 4); - TEST_SEARCH ("((^(a|b))c|^d)e", "bcef", 0, 4); - TEST_SEARCH ("((^(a|b))c|^d)e", "def", 0, 3); - - TEST_SEARCH ("(^((a|b))c|^d)e", "acef", 0, 4); - TEST_SEARCH ("(^((a|b))c|^d)e", "bcef", 0, 4); - TEST_SEARCH ("(^((a|b))c|^d)e", "def", 0, 3); - - TEST_SEARCH ("(((a|b))c|d)e$", "face", 0, 4); - TEST_SEARCH ("(((a|b))c|d)e$", "fbce", 0, 4); - TEST_SEARCH ("(((a|b))c|d)e$", "fde", 0, 3); - - TEST_SEARCH ("^e(d|c((a|b)))", "edf", 0, 3); - TEST_SEARCH ("^e(d|c((a|b)))", "ecaf", 0, 4); - TEST_SEARCH ("^e(d|c((a|b)))", "ecbf", 0, 4); - - TEST_SEARCH ("e(d$|c((a$|b$)))", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a$|b$)))", "feca", 0, 4); - TEST_SEARCH ("e(d$|c((a$|b$)))", "fecb", 0, 4); - - TEST_SEARCH ("e(d$|c((a|b)$))", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a|b)$))", "feca", 0, 4); - TEST_SEARCH ("e(d$|c((a|b)$))", "fecb", 0, 4); - - TEST_SEARCH ("e(d$|c((a|b))$)", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a|b))$)", "feca", 0, 3); - TEST_SEARCH ("e(d$|c((a|b))$)", "fecb", 0, 3); - - TEST_SEARCH ("e(d$|c((a|b)))$", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a|b)))$", "feca", 0, 3); - TEST_SEARCH ("e(d$|c((a|b)))$", "fecb", 0, 3); - - TEST_SEARCH ("(((^a|^b))c)|^de", "acf", 0, 3); - TEST_SEARCH ("(((^a|^b))c)|^de", "bcf", 0, 3); - TEST_SEARCH ("(((^a|^b))c)|^de", "def", 0, 3); - - TEST_SEARCH ("(((a|b))c$)|de$", "fac", 0, 3); - TEST_SEARCH ("(((a|b))c$)|de$", "fbc", 0, 3); - TEST_SEARCH ("(((a|b))c$)|de$", "fde", 0, 3); - - TEST_SEARCH ("(((a|b))c)$|de$", "fac", 0, 3); - TEST_SEARCH ("(((a|b))c)$|de$", "fbc", 0, 3); - TEST_SEARCH ("(((a|b))c)$|de$", "fde", 0, 3); - - TEST_SEARCH ("^ed|^(c((a|b)))", "edf", 0, 3); - TEST_SEARCH ("^ed|^(c((a|b)))", "caf", 0, 3); - TEST_SEARCH ("^ed|^(c((a|b)))", "cbf", 0, 3); - - TEST_SEARCH ("^ed|(^c((a|b)))", "edf", 0, 3); - TEST_SEARCH ("^ed|(^c((a|b)))", "caf", 0, 3); - TEST_SEARCH ("^ed|(^c((a|b)))", "cbf", 0, 3); - - TEST_SEARCH ("ed$|(c((a|b)))$", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)))$", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)))$", "fcb", 0, 3); - - TEST_SEARCH ("ed$|(c((a|b))$)", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a|b))$)", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a|b))$)", "fcb", 0, 3); - - TEST_SEARCH ("ed$|(c((a|b)$))", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)$))", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)$))", "fcb", 0, 3); - - TEST_SEARCH ("ed$|(c((a$|b$)))", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a$|b$)))", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a$|b$)))", "fcb", 0, 3); - - TEST_SEARCH ("^a(b|c)|^d", "abe", 0, 3); - TEST_SEARCH ("^a(b|c)|^d", "ace", 0, 3); - TEST_SEARCH ("^a(b|c)|^d", "df", 0, 2); - - TEST_SEARCH ("a(b$|c$)|d$", "fab", 0, 3); - TEST_SEARCH ("a(b$|c$)|d$", "fac", 0, 3); - TEST_SEARCH ("a(b$|c$)|d$", "fd", 0, 2); - - TEST_SEARCH ("^(a)(b|c)|^d", "abe", 0, 3); - TEST_SEARCH ("^(a)(b|c)|^d", "ace", 0, 3); - TEST_SEARCH ("^(a)(b|c)|^d", "df", 0, 2); - - TEST_SEARCH ("(^a)(b|c)|^d", "abe", 0, 3); - TEST_SEARCH ("(^a)(b|c)|^d", "ace", 0, 3); - TEST_SEARCH ("(^a)(b|c)|^d", "df", 0, 2); - - TEST_SEARCH ("(a)(b|c)$|d$", "fab", 0, 3); - TEST_SEARCH ("(a)(b|c)$|d$", "fac", 0, 3); - TEST_SEARCH ("(a)(b|c)$|d$", "fd", 0, 2); - - TEST_SEARCH ("(b|c)(a)$|d$", "fba", 0, 3); - TEST_SEARCH ("(b|c)(a)$|d$", "fca", 0, 3); - TEST_SEARCH ("(b|c)(a)$|d$", "fd", 0, 2); - - TEST_SEARCH ("(b|c)(a$)|d$", "fba", 0, 3); - TEST_SEARCH ("(b|c)(a$)|d$", "fca", 0, 3); - TEST_SEARCH ("(b|c)(a$)|d$", "fd", 0, 2); - - TEST_SEARCH ("(a)(b$|c$)|d$", "fab", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "fac", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "fd", 0, 2); - - TEST_SEARCH ("^d|^(b|c)(a)", "df", 0, 2); - TEST_SEARCH ("^d|^(b|c)(a)", "baf", 0, 3); - TEST_SEARCH ("^d|^(b|c)(a)", "caf", 0, 3); - - TEST_SEARCH ("^d|(^b|^c)(a)", "df", 0, 2); - TEST_SEARCH ("^d|(^b|^c)(a)", "baf", 0, 3); - TEST_SEARCH ("^d|(^b|^c)(a)", "caf", 0, 3); - - TEST_SEARCH ("d$|(b|c)(a$)", "fd", 0, 2); - TEST_SEARCH ("d$|(b|c)(a$)", "fba", 0, 3); - TEST_SEARCH ("d$|(b|c)(a$)", "fca", 0, 3); - - TEST_SEARCH ("d$|(b|c)(a)$", "fd", 0, 2); - TEST_SEARCH ("d$|(b|c)(a)$", "fba", 0, 3); - TEST_SEARCH ("d$|(b|c)(a)$", "fca", 0, 3); - - TEST_SEARCH ("d$|(b|c)(a$)", "fd", 0, 2); - TEST_SEARCH ("d$|(b|c)(a$)", "fba", 0, 3); - TEST_SEARCH ("d$|(b|c)(a$)", "fca", 0, 3); - - TEST_SEARCH ("^d|^(a)(b|c)", "df", 0, 2); - TEST_SEARCH ("^d|^(a)(b|c)", "abf", 0, 3); - TEST_SEARCH ("^d|^(a)(b|c)", "acf", 0, 3); - - TEST_SEARCH ("^d|(^a)(b|c)", "df", 0, 2); - TEST_SEARCH ("^d|(^a)(b|c)", "abf", 0, 3); - TEST_SEARCH ("^d|(^a)(b|c)", "acf", 0, 3); - - TEST_SEARCH ("d$|(a)(b$|c$)", "fd", 0, 2); - TEST_SEARCH ("d$|(a)(b$|c$)", "fab", 0, 3); - TEST_SEARCH ("d$|(a)(b$|c$)", "fac", 0, 3); - - TEST_SEARCH ("d$|(a)(b|c)$", "fd", 0, 2); - TEST_SEARCH ("d$|(a)(b|c)$", "fab", 0, 3); - TEST_SEARCH ("d$|(a)(b|c)$", "fac", 0, 3); - - TEST_SEARCH ("((^a|^b)|^c)|^d", "ae", 0, 2); - TEST_SEARCH ("((^a|^b)|^c)|^d", "be", 0, 2); - TEST_SEARCH ("((^a|^b)|^c)|^d", "ce", 0, 2); - TEST_SEARCH ("((^a|^b)|^c)|^d", "de", 0, 2); - - TEST_SEARCH ("((a|b)|c)|d$", "ed", 0, 2); - TEST_SEARCH ("((a|b)|c)|d$", "ea", 0, 2); - TEST_SEARCH ("((a|b)|c)|d$", "eb", 0, 2); - TEST_SEARCH ("((a|b)|c)|d$", "ec", 0, 2); - - TEST_SEARCH ("^d|(c|(a|b))", "de", 0, 2); - - TEST_SEARCH ("d$|(c$|(a$|b$))", "ed", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|b$))", "ec", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|b$))", "ea", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|b$))", "eb", 0, 2); - - TEST_SEARCH ("d$|(c$|(a|b)$)", "ed", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b)$)", "ec", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b)$)", "ea", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b)$)", "eb", 0, 2); - - TEST_SEARCH ("d$|(c$|(a|b))$", "ed", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b))$", "ec", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b))$", "ea", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b))$", "eb", 0, 2); - - test_match ("a|^b", "b"); - test_match ("a|b$", "b"); - test_match ("^b|a", "b"); - test_match ("b$|a", "b"); - test_match ("(^a)", "a"); - test_match ("(a$)", "a"); - TEST_SEARCH ("c|^ab", "aba", 0, 3); - TEST_SEARCH ("c|ba$", "aba", 0, 3); - TEST_SEARCH ("^ab|c", "aba", 0, 3); - TEST_SEARCH ("ba$|c", "aba", 0, 3); - TEST_SEARCH ("(^a)", "ab", 0, 2); - TEST_SEARCH ("(a$)", "ba", 0, 2); - - TEST_SEARCH ("(^a$)", "a", 0, 1); - TEST_SEARCH ("(^a)", "ab", 0, 2); - TEST_SEARCH ("(b$)", "ab", 0, 2); - - /* Backtracking. */ - /* Per POSIX D11.1 p. 108, leftmost longest match. */ - test_match ("(wee|week)(knights|night)", "weeknights"); - - test_match ("(fooq|foo)qbar", "fooqbar"); - test_match ("(fooq|foo)(qbarx|bar)", "fooqbarx"); - - /* Take first alternative that does the longest match. */ - test_all_registers ("(fooq|(foo)|(fo))((qbarx)|(oqbarx)|bar)", "fooqbarx", - "", 0, 8, 0, 3, 0, 3, -1, -1, 3, 8, 3, 8, -1, -1, -1, -1, -1, -1, - -1, -1); - - test_match ("(fooq|foo)*qbar", "fooqbar"); - test_match ("(fooq|foo)*(qbar)", "fooqbar"); - test_match ("(fooq|foo)*(qbar)*", "fooqbar"); - - test_match ("(fooq|fo|o)*qbar", "fooqbar"); - test_match ("(fooq|fo|o)*(qbar)", "fooqbar"); - test_match ("(fooq|fo|o)*(qbar)*", "fooqbar"); - - test_match ("(fooq|fo|o)*(qbar|q)*", "fooqbar"); - test_match ("(fooq|foo)*(qbarx|bar)", "fooqbarx"); - test_match ("(fooq|foo)*(qbarx|bar)*", "fooqbarx"); - - test_match ("(fooq|fo|o)+(qbar|q)+", "fooqbar"); - test_match ("(fooq|foo)+(qbarx|bar)", "fooqbarx"); - test_match ("(fooq|foo)+(qbarx|bar)+", "fooqbarx"); - - /* Per Mike Haertel. */ - test_match ("(foo|foobarfoo)(bar)*", "foobarfoo"); - - /* Combination. */ - test_match ("[ab]?c", "ac"); - test_match ("[ab]*c", "ac"); - test_match ("[ab]+c", "ac"); - test_match ("(a|b)?c", "ac"); - test_match ("(a|b)*c", "ac"); - test_match ("(a|b)+c", "ac"); - test_match ("(a*c)?b", "b"); - test_match ("(a*c)+b", "aacb"); - /* Registers. */ - /* Per David A. Willcox. */ - test_match ("a((b)|(c))d", "acd"); - test_all_registers ("a((b)|(c))d", "acd", "", 0, 3, 1, 2, -1, -1, 1, 2, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - - /* Extended regular expressions, continued; these don't match their strings. */ - test_should_match = false; - -#if 0 - /* Invalid use of special characters. */ - /* These are not invalid anymore, since POSIX says the behavior is - undefined, and we prefer context-independent to context-invalid. */ - invalid_pattern (REG_BADRPT, "*"); - invalid_pattern (REG_BADRPT, "a|*"); - invalid_pattern (REG_BADRPT, "(*)"); - invalid_pattern (REG_BADRPT, "^*"); - invalid_pattern (REG_BADRPT, "+"); - invalid_pattern (REG_BADRPT, "a|+"); - invalid_pattern (REG_BADRPT, "(+)"); - invalid_pattern (REG_BADRPT, "^+"); - - invalid_pattern (REG_BADRPT, "?"); - invalid_pattern (REG_BADRPT, "a|?"); - invalid_pattern (REG_BADRPT, "(?)"); - invalid_pattern (REG_BADRPT, "^?"); - - invalid_pattern (REG_BADPAT, "|"); - invalid_pattern (REG_BADPAT, "a|"); - invalid_pattern (REG_BADPAT, "a||"); - invalid_pattern (REG_BADPAT, "(|a)"); - invalid_pattern (REG_BADPAT, "(a|)"); - - invalid_pattern (REG_BADPAT, PARENS_TO_OPS ("(|)")); - - invalid_pattern (REG_BADRPT, "{1}"); - invalid_pattern (REG_BADRPT, "a|{1}"); - invalid_pattern (REG_BADRPT, "^{1}"); - invalid_pattern (REG_BADRPT, "({1})"); - - invalid_pattern (REG_BADPAT, "|b"); - - invalid_pattern (REG_BADRPT, "^{0,}*"); - invalid_pattern (REG_BADRPT, "$*"); - invalid_pattern (REG_BADRPT, "${0,}*"); -#endif /* 0 */ - - invalid_pattern (REG_EESCAPE, "\\"); - - test_match ("a?b", "a"); - - - test_match ("a+", ""); - test_match ("a+b", "a"); - test_match ("a?", "b"); - -#if 0 - /* We make empty groups valid now, since they are undefined in POSIX. - (13 Sep 92) */ - /* Subexpressions. */ - invalid_pattern (REG_BADPAT, "()"); - invalid_pattern (REG_BADPAT, "a()"); - invalid_pattern (REG_BADPAT, "()b"); - invalid_pattern (REG_BADPAT, "a()b"); - invalid_pattern (REG_BADPAT, "()*"); - invalid_pattern (REG_BADPAT, "(()*"); -#endif - /* Invalid intervals. */ - test_match ("a{2}*", "aaa"); - test_match ("a{2}?", "aaa"); - test_match ("a{2}+", "aaa"); - test_match ("a{2}{2}", "aaa"); - test_match ("a{1}{1}{2}", "aaa"); - test_match ("a{1}{1}{2}", "a"); - /* Invalid alternation. */ - test_match ("a|b", "c"); - - TEST_SEARCH ("c|^ba", "aba", 0, 3); - TEST_SEARCH ("c|ab$", "aba", 0, 3); - TEST_SEARCH ("^ba|c", "aba", 0, 3); - TEST_SEARCH ("ab$|c", "aba", 0, 3); - /* Invalid anchoring. */ - TEST_SEARCH ("(^a)", "ba", 0, 2); - TEST_SEARCH ("(b$)", "ba", 0, 2); - - printf ("\nFinished POSIX extended tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex-0.12/test/psx-generic.c b/regex-0.12/test/psx-generic.c @@ -1,336 +0,0 @@ -/* psx-generic.c: test POSIX re's independent of us using basic or - extended syntax. */ - -#include "test.h" - - -void -test_posix_generic () -{ - int omit_generic_tests = 0; /* reset in debugger to skip */ - - if (omit_generic_tests) - return; - /* Tests somewhat in the order of P1003.2. */ - - /* Both posix basic and extended; should match. */ - - printf ("\nStarting generic POSIX tests.\n"); - test_grouping (); - test_intervals (); - - test_should_match = true; - /* Ordinary characters. */ - printf ("\nContinuing generic POSIX tests.\n"); - - MATCH_SELF (""); - test_fastmap ("", "", 0, 0); - test_fastmap_search ("", "", "", 0, 0, 2, 0, 0); - TEST_REGISTERS ("", "", 0, 0, -1, -1, -1, -1); - TEST_SEARCH ("", "", 0, 0); - TEST_SEARCH_2 ("", "", "", 0, 1, 0); - - MATCH_SELF ("abc"); - test_fastmap ("abc", "a", 0, 0); - TEST_REGISTERS ("abc", "abc", 0, 3, -1, -1, -1, -1); - TEST_REGISTERS ("abc", "xabcx", 1, 4, -1, -1, -1, -1); - - test_match ("\\a","a"); - test_match ("\\0", "0"); - - TEST_SEARCH ("a", "ab", 0, 2); - TEST_SEARCH ("b", "ab", 0, 2); - TEST_SEARCH ("a", "ab", 1, -2); - TEST_SEARCH_2 ("a", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("b", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("a", "a", "b", 1, -2, 2); - - test_match ("\n", "\n"); - test_match ("a\n", "a\n"); - test_match ("\nb", "\nb"); - test_match ("a\nb", "a\nb"); - - TEST_SEARCH ("b", "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 236, -237); - /* Valid use of special characters. */ - test_match ("a*", "aa"); - test_fastmap ("a*", "a", 0, 0); - TEST_REGISTERS ("a*", "aa", 0, 2, -1, -1, -1, -1); - - test_match ("a*b", "aab"); - test_fastmap ("a*b", "ab", 0, 0); - - test_match ("a*ab", "aab"); - TEST_REGISTERS ("a*a", "aa", 0, 2, -1, -1, -1, -1); - TEST_REGISTERS ("a*a", "xaax", 1, 3, -1, -1, -1, -1); - - test_match ("\\{", "{"); - test_match ("\\^", "^"); - test_match ("\\.", "."); - test_match ("\\*", "*"); - test_match ("\\[", "["); - test_match ("\\$", "$"); - test_match ("\\\\", "\\"); - - test_match ("ab*", "a"); - test_match ("ab*", "abb"); - - /* Valid consecutive repetitions. */ - test_match ("a**", "a"); - /* Valid period. */ - test_match (".", "a"); - TEST_REGISTERS (".", "a", 0, 1, -1, -1, -1, -1); - test_match (".", "\004"); - test_match (".", "\n"); - /* Valid bracket expressions. */ - test_match ("[ab]", "a"); - test_match ("[ab]", "b"); - test_fastmap ("[ab]", "ab", 0, 0); - TEST_REGISTERS ("[ab]", "a", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS ("[ab]", "xax", 1, 2, -1, -1, -1, -1); - - test_fastmap ("[^ab]", "ab", 1, 1); - test_match ("[^ab]", "c"); - test_match ("[^a]", "\n"); - - test_match ("[a]*a", "aa"); - - test_match ("[[]", "["); - test_match ("[]]", "]"); - test_match ("[.]", "."); - test_match ("[*]", "*"); - test_match ("[\\]", "\\"); - test_match ("[\\(]", "("); - test_match ("[\\)]", ")"); - test_match ("[^]]", "a"); - test_match ("[a^]", "^"); - test_match ("[a$]", "$"); - test_match ("[]a]", "]"); - test_match ("[a][]]", "a]"); - test_match ("[\n]", "\n"); - test_match ("[^a]", "\n"); - test_match ("[a-]", "a"); - - TEST_REGISTERS ("\\`[ \t\n]*", " karl (Karl Berry)", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS ("[ \t\n]*\\'", " karl (Karl Berry)", 18, 18, -1, -1, -1, -1); - - /* Collating, noncollating, - equivalence classes aren't - implemented yet. */ - - - /* Character classes. */ - test_match ("[:alpha:]", "p"); - test_match ("[[:alpha:]]", "a"); - test_match ("[[:alpha:]]", "z"); - test_match ("[[:alpha:]]", "A"); - test_match ("[[:alpha:]]", "Z"); - test_match ("[[:upper:]]", "A"); - test_match ("[[:upper:]]", "Z"); - test_match ("[[:lower:]]", "a"); - test_match ("[[:lower:]]", "z"); - - test_match ("[[:digit:]]", "0"); - test_match ("[[:digit:]]", "9"); - test_fastmap ("[[:digit:]]", "0123456789", 0, 0); - - test_match ("[[:alnum:]]", "0"); - test_match ("[[:alnum:]]", "9"); - test_match ("[[:alnum:]]", "a"); - test_match ("[[:alnum:]]", "z"); - test_match ("[[:alnum:]]", "A"); - test_match ("[[:alnum:]]", "Z"); - test_match ("[[:xdigit:]]", "0"); - test_match ("[[:xdigit:]]", "9"); - test_match ("[[:xdigit:]]", "A"); - test_match ("[[:xdigit:]]", "F"); - test_match ("[[:xdigit:]]", "a"); - test_match ("[[:xdigit:]]", "f"); - test_match ("[[:space:]]", " "); - test_match ("[[:print:]]", " "); - test_match ("[[:print:]]", "~"); - test_match ("[[:punct:]]", ","); - test_match ("[[:graph:]]", "!"); - test_match ("[[:graph:]]", "~"); - test_match ("[[:cntrl:]]", "\177"); - test_match ("[[:digit:]a]", "a"); - test_match ("[[:digit:]a]", "2"); - test_match ("[a[:digit:]]", "a"); - test_match ("[a[:digit:]]", "2"); - test_match ("[[:]", "["); - test_match ("[:]", ":"); - test_match ("[[:a]", "["); - test_match ("[[:alpha:a]", "["); - /* Valid ranges. */ - test_match ("[a-a]", "a"); - test_fastmap ("[a-a]", "a", 0, 0); - TEST_REGISTERS ("[a-a]", "xax", 1, 2, -1, -1, -1, -1); - - test_match ("[a-z]", "z"); - test_fastmap ("[a-z]", "abcdefghijklmnopqrstuvwxyz", 0, 0); - test_match ("[-a]", "-"); /* First */ - test_match ("[-a]", "a"); - test_match ("[a-]", "-"); /* Last */ - test_match ("[a-]", "a"); - test_match ("[--@]", "@"); /* First and starting point. */ - - test_match ("[%--a]", "%"); /* Ending point. */ - test_match ("[%--a]", "-"); /* Ditto. */ - - test_match ("[a%--]", "%"); /* Both ending point and last. */ - test_match ("[a%--]", "-"); - test_match ("[%--a]", "a"); /* Ending point only. */ - test_match ("[a-c-f]", "e"); /* Piggyback. */ - - test_match ("[)-+--/]", "*"); - test_match ("[)-+--/]", ","); - test_match ("[)-+--/]", "/"); - test_match ("[[:digit:]-]", "-"); - /* Concatenation ????*/ - test_match ("[ab][cd]", "ac"); - test_fastmap ("[ab][cd]", "ab", 0, 0); - TEST_REGISTERS ("[ab][cd]", "ad", 0, 2, -1, -1, -1, -1); - TEST_REGISTERS ("[ab][cd]", "xadx", 1, 3, -1, -1, -1, -1); - - /* Valid expression anchoring. */ - test_match ("^a", "a"); - test_fastmap ("^a", "a", 0, 0); - TEST_REGISTERS ("^a", "ax", 0, 1, -1, -1, -1, -1); - - test_match ("^", ""); - TEST_REGISTERS ("^", "", 0, 0, -1, -1, -1, -1); - test_match ("$", ""); - TEST_REGISTERS ("$", "", 0, 0, -1, -1, -1, -1); - - test_match ("a$", "a"); - test_fastmap ("a$", "a", 0, 0); - TEST_REGISTERS ("a$", "xa", 1, 2, -1, -1, -1, -1); - - test_match ("^ab$", "ab"); - test_fastmap ("^ab$", "a", 0, 0); - TEST_REGISTERS ("^a$", "a", 0, 1, -1, -1, -1, -1); - - test_fastmap ("^$", "", 0, 0); - test_match ("^$", ""); - TEST_REGISTERS ("^$", "", 0, 0, -1, -1, -1, -1); - - TEST_SEARCH (PARENS_TO_OPS ("(^a)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ba", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("^(^a)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)$"), "ba", 0, 2); - - /* Two strings. */ - test_match_2 ("ab", "a", "b"); - TEST_REGISTERS_2 ("ab", "a", "b", 0, 2, -1, -1, -1, -1); - - test_match_2 ("a", "", "a"); - test_match_2 ("a", "a", ""); - test_match_2 ("ab", "a", "b"); - /* (start)pos. */ - TEST_POSITIONED_MATCH ("b", "ab", 1); - /* mstop. */ - TEST_TRUNCATED_MATCH ("a", "ab", 1); - - - /* Both basic and extended, continued; should not match. */ - - test_should_match = false; - /* Ordinary characters. */ - test_match ("abc", "ab"); - - TEST_SEARCH ("c", "ab", 0, 2); - TEST_SEARCH ("c", "ab", 0, 2); - TEST_SEARCH ("c", "ab", 1, -2); - TEST_SEARCH ("c", "ab", 0, 10); - TEST_SEARCH ("c", "ab", 1, -10); - TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("c", "a", "b", 1, -2, 2); - TEST_SEARCH_2 ("c", "a", "b", 1, -2, 2); - - TEST_SEARCH ("c", "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 236, -237); - - /* Invalid use of special characters. */ - invalid_pattern (REG_EESCAPE, "\\"); - invalid_pattern (REG_EESCAPE, "a\\"); - invalid_pattern (REG_EESCAPE, "a*\\"); - /* Invalid period. */ - test_match (".", ""); - /* Invalid bracket expressions. */ - test_match ("[ab]", "c"); - test_match ("[^b]", "b"); - test_match ("[^]]", "]"); - - invalid_pattern (REG_EBRACK, "["); - invalid_pattern (REG_EBRACK, "[^"); - invalid_pattern (REG_EBRACK, "[a"); - invalid_pattern (REG_EBRACK, "[]"); - invalid_pattern (REG_EBRACK, "[]a"); - invalid_pattern (REG_EBRACK, "a[]a"); - - - test_match ("[:alpha:]", "q"); /* Character classes. */ - test_match ("[[:alpha:]]", "2"); - test_match ("[[:upper:]]", "a"); - test_match ("[[:lower:]]", "A"); - test_match ("[[:digit:]]", "a"); - test_match ("[[:alnum:]]", ":"); - test_match ("[[:xdigit:]]", "g"); - test_match ("[[:space:]]", "a"); - test_match ("[[:print:]]", "\177"); - test_match ("[[:punct:]]", "a"); - test_match ("[[:graph:]]", " "); - test_match ("[[:cntrl:]]", "a"); - invalid_pattern (REG_EBRACK, "[[:"); - invalid_pattern (REG_EBRACK, "[[:alpha:"); - invalid_pattern (REG_EBRACK, "[[:alpha:]"); - invalid_pattern (REG_ECTYPE, "[[::]]"); - invalid_pattern (REG_ECTYPE, "[[:a:]]"); - invalid_pattern (REG_ECTYPE, "[[:alpo:]]"); - invalid_pattern (REG_ECTYPE, "[[:a:]"); - - test_match ("[a-z]", "2"); /* Invalid ranges. */ - test_match ("[^-a]", "-"); - test_match ("[^a-]", "-"); - test_match ("[)-+--/]", "."); - invalid_pattern (REG_ERANGE, "[z-a]"); /* Empty */ - invalid_pattern (REG_ERANGE, "[a--]"); /* Empty */ - invalid_pattern (REG_ERANGE, "[[:digit:]-9]"); - invalid_pattern (REG_ERANGE, "[a-[:alpha:]]"); - invalid_pattern (REG_ERANGE, "[a-"); - invalid_pattern (REG_EBRACK, "[a-z"); - - test_match ("[ab][cd]", "ae"); /* Concatenation. */ - test_match ("b*c", "b"); /* Star. */ - - /* Invalid anchoring. */ - test_match ("^", "a"); - test_match ("^a", "ba"); - test_match ("$", "b"); - test_match ("a$", "ab"); - test_match ("^$", "a"); - test_match ("^ab$", "a"); - - TEST_SEARCH ("^a", "b\na", 0, 3); - TEST_SEARCH ("b$", "b\na", 0, 3); - - test_match_2 ("^a", "\n", "a"); - test_match_2 ("a$", "a", "\n"); - - TEST_SEARCH (PARENS_TO_OPS ("(^a)"), "ba", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("^(^a)"), "ba", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)$"), "ab", 0, 2); - - printf ("\nFinished generic POSIX tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex-0.12/test/psx-group.c b/regex-0.12/test/psx-group.c @@ -1,440 +0,0 @@ -/* psx-group.c: test POSIX grouping, both basic and extended. */ - -#include "test.h" - - -void -test_grouping () -{ - printf ("\nStarting POSIX grouping tests.\n"); - - test_should_match = true; - - test_fastmap (PARENS_TO_OPS ("(a)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)"), "a", 0, 1, 0, 1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a)"), "xax", 1, 2, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("((a))"), "a"); - test_fastmap (PARENS_TO_OPS ("((a))"), "a", 0, 0); - TEST_REGISTERS (PARENS_TO_OPS ("((a))"), "a", 0, 1, 0, 1, 0, 1); - TEST_REGISTERS (PARENS_TO_OPS ("((a))"), "xax", 1, 2, 1, 2, 1, 2); - - test_fastmap (PARENS_TO_OPS ("(a)(b)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)(b)"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)(b)"), "ab", 0, 2, 0, 1, 1, 2); - - TEST_REGISTERS (PARENS_TO_OPS ("(a)(b)"), "xabx", 1, 3, 1, 2, 2, 3); - - test_all_registers (PARENS_TO_OPS ("((a)(b))"), "ab", "", 0, 2, 0, 2, 0, 1, - 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - - /* Test that we simply ignore groups past the 255th. */ - test_match (PARENS_TO_OPS ("((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((a))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"), "a"); - - - /* Per POSIX D11.1, p. 125. */ - - test_fastmap (PARENS_TO_OPS ("(a)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*"), "", 0, 0, -1, -1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*"), "aa", 0, 2, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)"), "", 0, 0, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)"), "a", 0, 1, 0, 1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)b"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*)b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)b"), "b", 0, 1, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)b"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)b"), "ab", 0, 2, 0, 1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)b)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)b)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "ab", 0, 2, 0, 2, 0, 1); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "abb"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abb", 0, 3, 2, 3, 2, 2); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "aabab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "aabab", 0, 5, 3, 5, 3, 4); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "abbab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abbab", 0, 5, 3, 5, 3, 4); - - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "xabbabx", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "abaabaaaab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abaabaaab", 0, 9, 5, 9, 5, 8); - - test_fastmap (PARENS_TO_OPS ("(ab)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(ab)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab)*"), "abab"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "abab", 0, 4, 2, 4, -1, -1); - - /* We match the empty string here. */ - TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "xababx", 0, 0, -1, -1, -1, -1); - - /* Per David A. Willcox. */ - TEST_REGISTERS (PARENS_TO_OPS ("a(b*)c"), "ac", 0, 2, 1, 1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*b"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a)*b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "b", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a)*b"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "ab", 0, 2, 0, 1, -1, -1); - - test_match_2 (PARENS_TO_OPS ("(a)*b"), "a", "ab"); - TEST_REGISTERS_2 (PARENS_TO_OPS ("(a)*b"), "a", "ab", 0, 3, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a)*b"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "aab", 0, 3, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*a"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)*a"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*a"), "a", 0, 1, -1, -1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "", 0, 0, 0, 0, 0, 0); - - test_match (PARENS_TO_OPS ("((a*))*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*))*"), "", 0, 0, 0, 0, 0, 0); - test_match (PARENS_TO_OPS ("((a*))*"), "aa"); - - test_fastmap (PARENS_TO_OPS ("(a*)*b"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*)*b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "b", 0, 1, 0, 0, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "xbx", 1, 2, 1, 1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)*b"), "ab"); /* Per rms. */ - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "ab", 0, 2, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "xabx", 1, 3, 1, 2, -1, -1); - - /* Test register restores. */ - test_match (PARENS_TO_OPS ("(a*)*b"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "aab", 0, 3, 0, 2, -1, -1); - - TEST_REGISTERS_2 (PARENS_TO_OPS ("(a*)*b"), "a", "ab", 0, 3, 0, 2, -1, -1); - - /* We are matching the empty string, with backtracking. */ - test_fastmap (PARENS_TO_OPS ("(a*)a"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)a"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)a"), "a", 0, 1, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)a"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)a"), "aa", 0, 2, 0, 1, -1, -1); - - /* We are matching the empty string, with backtracking. */ -/*fails test_match (PARENS_TO_OPS ("(a*)*a"), "a"); */ - test_match (PARENS_TO_OPS ("(a*)*a"), "aa"); - /* Match the empty string. */ - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "a", 0, 1, 0, 0, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "xax", 1, 2, 1, 1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "xaax", 1, 3, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*ab"), "a", 0 , 0); - test_match (PARENS_TO_OPS ("(a)*ab"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*ab"), "ab", 0, 2, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a)*ab"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*ab"), "aab", 0, 3, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS("(a)*ab"), "xaabx", 1, 4, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)ab"), "a", 0 , 0); - test_match (PARENS_TO_OPS ("(a*)ab"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "ab", 0, 2, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)ab"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "aab", 0, 3, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "xaabx", 1, 4, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)*ab"), "a", 0 , 0); - test_match (PARENS_TO_OPS ("(a*)*ab"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*ab"), "ab", 0, 2, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)*ab"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*ab"), "aab", 0, 3, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS("(a*)*ab"), "xaabx", 1, 4, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)*b*c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("(a*)*b*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b*c"), "c", 0, 1, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*(ab)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)*(ab)*"), "ab"); - /* Register 1 doesn't match at all (vs. matching the empty string) - because of backtracking, hence -1's. */ - TEST_REGISTERS (PARENS_TO_OPS ("(a)*(ab)*"), "ab", 0, 2, -1, -1, 0, 2); - - test_match (PARENS_TO_OPS ("(a*)*(ab)*"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*(ab)*"), "ab", 0, 2, 0, 0, 0, 2); - - test_fastmap (PARENS_TO_OPS ("(a*b)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*b)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "b", 0, 1, 0, 1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*"), "baab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "baab", 0, 4, 1, 4, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*b*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "", 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "a", 0, 1, 0, 1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "ba"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "ba", 0, 2, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 2, 0, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "aa", 0, 2, 0, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "bb"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "bb", 0, 2, 0, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "aba"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "aba", 0, 3, 2, 3, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)b"), "b", 0, 1, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)*(b*)*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("((a*)*(b*)*)*"), "", "", 0, 0, 0, 0, - 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), "aba"); - /* Perhaps register 3 should be 3/3 here? Not sure if standard - specifies this. xx*/ - test_all_registers (PARENS_TO_OPS ("((a*)*(b*)*)*"), "aba", "", 0, 3, 2, 3, - 2, 3, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)(b*))*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)(b*))*"), ""); - - test_all_registers (PARENS_TO_OPS ("((a*)(b*))*"), "", "", 0, 0, 0, 0, - 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), ""); - - test_match (PARENS_TO_OPS ("((a*)(b*))*"), "aba"); - test_all_registers (PARENS_TO_OPS ("((a*)(b*))*"), "aba", "", 0, 3, 2, 3, - 2, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a)*(b)*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a)*(b)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("((a)*(b)*)*"), "", "", 0, 0, 0, 0, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*(b)*)*"), "aba"); - - test_all_registers (PARENS_TO_OPS ("((a)*(b)*)*"), "aba", "", 0, 3, 2, 3, - 2, 3, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c", 0, 0); - test_match (PARENS_TO_OPS ("(c(a)*(b)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("(c(a)*(b)*)*"), "", "", 0, 0, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c"); - test_all_registers (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c", "", 0, 1, 0, 1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("c((a)*(b)*)*"), "c", 0, 0); - test_match (PARENS_TO_OPS ("c((a)*(b)*)*"), "c"); - test_all_registers (PARENS_TO_OPS ("c((a)*(b)*)*"), "c", "", 0, 1, 1, 1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(((a)*(b)*)*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(((a)*(b)*)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("(((a)*(b)*)*)*"), "", "", 0, 0, 0, 0, - 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), ""); - test_fastmap (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "c", 0, 0); - - test_all_registers (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "", "", 0, 0, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a)*b)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b)*"), ""); - - test_match (PARENS_TO_OPS ("((a)*b)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*b)*"), "abb"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "abb", 0, 3, 2, 3, 0, 1); /*zz*/ - - test_match (PARENS_TO_OPS ("((a)*b)*"), "abbab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "abbab", 0, 5, 3, 5, 3, 4); - - /* We match the empty string here. */ - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "xabbabx", 0, 0, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "", 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)*"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "aa", 0, 2, 0, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)*)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("((a*)*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)*)*"), "", 0, 0, 0, 0, 0, 0); - - test_match (PARENS_TO_OPS ("((a*)*)*"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)*)*"), "a", 0, 1, 0, 1, 0, 1); - - test_fastmap (PARENS_TO_OPS ("(ab*)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(ab*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab*)*"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*"), "aa", 0, 2, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(ab*)*c"), "ac", 0, 0); - test_match (PARENS_TO_OPS ("(ab*)*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "c", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab*)*c"), "abbac"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "abbac", 0, 5, 3, 4, -1, -1); - - test_match (PARENS_TO_OPS ("(ab*)*c"), "abac"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "abac", 0, 4, 2, 3, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*b)*c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("(a*b)*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "c", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*c"), "bbc"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "bbc", 0, 3, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*c"), "aababc"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "aababc", 0, 6, 3, 5, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*c"), "aabaabc"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "aabaabc", 0, 7, 3, 6, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)b*)"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)b*)"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "", 0, 0, 0, 0, 0, 0); - - test_match (PARENS_TO_OPS ("((a*)b*)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "a", 0, 1, 0, 1, 0, 1); - - test_match (PARENS_TO_OPS ("((a*)b*)"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "b", 0, 1, 0, 1, 0, 0); - - test_fastmap (PARENS_TO_OPS ("((a)*b*)"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b*)"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "", 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*b*)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "a", 0, 1, 0, 1, 0, 1); - - test_match (PARENS_TO_OPS ("((a)*b*)"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "b", 0, 1, 0, 1, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*b*)"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "ab", 0, 2, 0, 2, 0, 1); - - test_fastmap (PARENS_TO_OPS ("((a*)b*)c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("((a*)b*)c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)c"), "c", 0, 1, 0, 0, 0, 0); - - test_fastmap (PARENS_TO_OPS ("((a)*b*)c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b*)c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)c"), "c", 0, 1, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*b*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "", 0, 0, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(((a*))((b*)))*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(((a*))((b*)))*"), ""); - test_all_registers (PARENS_TO_OPS ("(((a*))((b*)))*"), "", "", 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), "abcde", 0, 0); - test_match (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), ""); - test_all_registers (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), "", "", 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a)*b)*c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b)*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*c"), "c", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab)*"), ""); - test_match (PARENS_TO_OPS ("((ab)*)"), ""); - test_match (PARENS_TO_OPS ("(((ab)*))"), ""); - test_match (PARENS_TO_OPS ("((((ab)*)))"), ""); - test_match (PARENS_TO_OPS ("(((((ab)*))))"), ""); - test_match (PARENS_TO_OPS ("((((((ab)*)))))"), ""); - test_match (PARENS_TO_OPS ("(((((((ab)*))))))"), ""); - test_match (PARENS_TO_OPS ("((((((((ab)*)))))))"), ""); - test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), ""); - - - test_fastmap (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "a", 0, 0); - test_match (PARENS_TO_OPS ("((((((((((ab)*)))))))))"), ""); - test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), ""); - test_all_registers (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "", NULL, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "abab"); - test_all_registers (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "abab", NULL, - 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 2, 4); - - - test_should_match = false; - - invalid_pattern (REG_EPAREN, PARENS_TO_OPS ("(a")); - - test_match (PARENS_TO_OPS ("(a)"), ""); - test_match (PARENS_TO_OPS ("((a))"), "b"); - test_match (PARENS_TO_OPS ("(a)(b)"), "ac"); - test_match (PARENS_TO_OPS ("(ab)*"), "acab"); - test_match (PARENS_TO_OPS ("(a*)*b"), "c"); - test_match (PARENS_TO_OPS ("(a*b)*"), "baa"); - test_match (PARENS_TO_OPS ("(a*b)*"), "baabc"); - test_match (PARENS_TO_OPS ("(a*b*)*"), "c"); - test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), "c"); - test_match (PARENS_TO_OPS ("(a*)*"), "ab"); - test_match (PARENS_TO_OPS ("((a*)*)*"), "ab"); - test_match (PARENS_TO_OPS ("((a*)*)*"), "b"); - test_match (PARENS_TO_OPS ("(ab*)*"), "abc"); - test_match (PARENS_TO_OPS ("(ab*)*c"), "abbad"); - test_match (PARENS_TO_OPS ("(a*c)*b"), "aacaacd"); - test_match (PARENS_TO_OPS ("(a*)"), "b"); - test_match (PARENS_TO_OPS ("((a*)b*)"), "c"); - - /* Expression anchoring. */ - TEST_SEARCH (PARENS_TO_OPS ("(^b)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ab", 0, 2); - - printf ("\nFinished POSIX grouping tests.\n"); -} diff --git a/regex-0.12/test/psx-interf.c b/regex-0.12/test/psx-interf.c @@ -1,624 +0,0 @@ -/* psx-interf.c: test POSIX interface. */ - -#include <string.h> -#include <assert.h> - -#include "test.h" - -#define ERROR_CODE_LENGTH 20 -#define TEST_ERRBUF_SIZE 15 - - -void test_compile (); - - -/* ANSWER should be at least ERROR_CODE_LENGTH long. */ - -static char * -get_error_string (error_code, answer) - int error_code; - char answer[]; -{ - switch (error_code) - { - case 0: strcpy (answer, "No error"); break; - case REG_NOMATCH: strcpy (answer, "REG_NOMATCH"); break; - case REG_BADPAT: strcpy (answer, "REG_BADPAT"); break; - case REG_EPAREN: strcpy (answer, "REG_EPAREN"); break; - case REG_ESPACE: strcpy (answer, "REG_ESPACE"); break; - case REG_ECOLLATE: strcpy (answer, "REG_ECOLLATE"); break; - case REG_ECTYPE: strcpy (answer, "REG_ECTYPE"); break; - case REG_EESCAPE: strcpy (answer, "REG_EESCAPE"); break; - case REG_ESUBREG: strcpy (answer, "REG_ESUBREG"); break; - case REG_EBRACK: strcpy (answer, "REG_EBRACK"); break; - case REG_EBRACE: strcpy (answer, "REG_EBRACE"); break; - case REG_BADBR: strcpy (answer, "REG_BADBR"); break; - case REG_ERANGE: strcpy (answer, "REG_ERANGE"); break; - case REG_BADRPT: strcpy (answer, "REG_BADRPT"); break; - case REG_EEND: strcpy (answer, "REG_EEND"); break; - default: strcpy (answer, "Bad error code"); - } - return answer; -} - - -/* I don't think we actually need to initialize all these things. - --karl */ - -void -init_pattern_buffer (pattern_buffer_ptr) - regex_t *pattern_buffer_ptr; -{ - pattern_buffer_ptr->buffer = NULL; - pattern_buffer_ptr->allocated = 0; - pattern_buffer_ptr->used = 0; - pattern_buffer_ptr->fastmap = NULL; - pattern_buffer_ptr->fastmap_accurate = 0; - pattern_buffer_ptr->translate = NULL; - pattern_buffer_ptr->can_be_null = 0; - pattern_buffer_ptr->re_nsub = 0; - pattern_buffer_ptr->no_sub = 0; - pattern_buffer_ptr->not_bol = 0; - pattern_buffer_ptr->not_eol = 0; -} - - -void -test_compile (valid_pattern, error_code_expected, pattern, - pattern_buffer_ptr, cflags) - unsigned valid_pattern; - int error_code_expected; - const char *pattern; - regex_t *pattern_buffer_ptr; - int cflags; -{ - int error_code_returned; - boolean error = false; - char errbuf[TEST_ERRBUF_SIZE]; - - init_pattern_buffer (pattern_buffer_ptr); - error_code_returned = regcomp (pattern_buffer_ptr, pattern, cflags); - - if (valid_pattern && error_code_returned) - { - printf ("\nShould have been a valid pattern but wasn't.\n"); - regerror (error_code_returned, pattern_buffer_ptr, errbuf, - TEST_ERRBUF_SIZE); - printf ("%s", errbuf); - error = true; - } - - if (!valid_pattern && !error_code_returned) - { - printf ("\n\nInvalid pattern compiled as valid:\n"); - error = true; - } - - if (error_code_returned != error_code_expected) - { - char expected_error_string[ERROR_CODE_LENGTH]; - char returned_error_string[ERROR_CODE_LENGTH]; - - get_error_string (error_code_expected, expected_error_string), - get_error_string (error_code_returned, returned_error_string); - - printf (" Expected error code %s but got `%s'.\n", - expected_error_string, returned_error_string); - - error = true; - } - - if (error) - print_pattern_info (pattern, pattern_buffer_ptr); -} - - -static void -test_nsub (sub_count, pattern, cflags) - unsigned sub_count; - char *pattern; - int cflags; - -{ - regex_t pattern_buffer; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - - if (pattern_buffer.re_nsub != sub_count) - { - printf ("\nShould have counted %d subexpressions but counted %d \ -instead.\n", sub_count, pattern_buffer.re_nsub); - } - - regfree (&pattern_buffer); -} - - -static void -test_regcomp () -{ - regex_t pattern_buffer; - int cflags = 0; - - - printf ("\nStarting regcomp tests.\n"); - - cflags = 0; - test_compile (0, REG_ESUBREG, "\\(a\\)\\2", &pattern_buffer, cflags); - test_compile (0, REG_EBRACE, "a\\{", &pattern_buffer, cflags); - test_compile (0, REG_BADBR, "a\\{-1\\}", &pattern_buffer, cflags); - test_compile (0, REG_EBRACE, "a\\{", &pattern_buffer, cflags); - test_compile (0, REG_EBRACE, "a\\{1", &pattern_buffer, cflags); - - cflags = REG_EXTENDED; - test_compile (0, REG_ECTYPE, "[[:alpo:]]", &pattern_buffer, cflags); - test_compile (0, REG_EESCAPE, "\\", &pattern_buffer, cflags); - test_compile (0, REG_EBRACK, "[a", &pattern_buffer, cflags); - test_compile (0, REG_EPAREN, "(", &pattern_buffer, cflags); - test_compile (0, REG_ERANGE, "[z-a]", &pattern_buffer, cflags); - - test_nsub (1, "(a)", cflags); - test_nsub (2, "((a))", cflags); - test_nsub (2, "(a)(b)", cflags); - - cflags = REG_EXTENDED | REG_NOSUB; - test_nsub (1, "(a)", cflags); - - regfree (&pattern_buffer); - - printf ("\nFinished regcomp tests.\n"); -} - - -static void -fill_pmatch (pmatch, start0, end0, start1, end1, start2, end2) - regmatch_t pmatch[]; - regoff_t start0, end0, start1, end1, start2, end2; -{ - pmatch[0].rm_so = start0; - pmatch[0].rm_eo = end0; - pmatch[1].rm_so = start1; - pmatch[1].rm_eo = end1; - pmatch[2].rm_so = start2; - pmatch[2].rm_eo = end2; -} - - -static void -test_pmatch (pattern, string, nmatch, pmatch, correct_pmatch, cflags) - char *pattern; - char *string; - unsigned nmatch; - regmatch_t pmatch[]; - regmatch_t correct_pmatch[]; - int cflags; -{ - regex_t pattern_buffer; - unsigned this_match; - int error_code_returned; - boolean found_nonmatch = false; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, nmatch, pmatch, 0); - - if (error_code_returned == REG_NOMATCH) - printf ("Matching failed in test_pmatch.\n"); - else - { - for (this_match = 0; this_match < nmatch; this_match++) - { - if (pmatch[this_match].rm_so != correct_pmatch[this_match].rm_so) - { - if (found_nonmatch == false) - printf ("\n"); - - printf ("Pmatch start %d wrong: was %d when should have \ -been %d.\n", this_match, pmatch[this_match].rm_so, - correct_pmatch[this_match].rm_so); - found_nonmatch = true; - } - if (pmatch[this_match].rm_eo != correct_pmatch[this_match].rm_eo) - { - if (found_nonmatch == false) - printf ("\n"); - - printf ("Pmatch end %d wrong: was %d when should have been \ -%d.\n", this_match, pmatch[this_match].rm_eo, - correct_pmatch[this_match].rm_eo); - found_nonmatch = true; - } - } - - if (found_nonmatch) - { - printf (" The number of pmatches requested was: %d.\n", nmatch); - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - } - } /* error_code_returned == REG_NOMATCH */ - - regfree (&pattern_buffer); -} - - -static void -test_eflags (must_match_bol, must_match_eol, pattern, string, cflags, eflags) - boolean must_match_bol; - boolean must_match_eol; - char *pattern; - char *string; - int cflags; - int eflags; -{ - regex_t pattern_buffer; - int error_code_returned; - boolean was_error = false; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, eflags); - - if (error_code_returned == REG_NOMATCH) - { - /* If wasn't true that both 1) the anchored part of the pattern - had to match this string and 2) this string was a proper - substring... */ - - if (!( (must_match_bol && (eflags & REG_NOTBOL)) - || (must_match_eol && (eflags & REG_NOTEOL)) )) - { - printf ("\nEflags test failed: didn't match when should have.\n"); - was_error = true; - } - } - else /* We got a match. */ - { - /* If wasn't true that either 1) the anchored part of the pattern - didn't have to match this string or 2) this string wasn't a - proper substring... */ - - if ((must_match_bol == (eflags & REG_NOTBOL)) - || (must_match_eol == (eflags & REG_NOTEOL))) - { - printf ("\nEflags test failed: matched when shouldn't have.\n"); - was_error = true; - } - } - - if (was_error) - { - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - - if (eflags & REG_NOTBOL) - printf (" The eflag REG_BOL was set.\n"); - if (eflags & REG_NOTEOL) - printf (" The eflag REG_EOL was set.\n"); - } - - regfree (&pattern_buffer); -} - - -static void -test_ignore_case (should_match, pattern, string, cflags) - boolean should_match; - char *pattern; - char *string; - int cflags; -{ - regex_t pattern_buffer; - int error_code_returned; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0); - - if (should_match && error_code_returned == REG_NOMATCH) - { - printf ("\nIgnore-case test failed:\n"); - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - - if (cflags & REG_ICASE) - printf (" The cflag REG_ICASE was set.\n"); - } - - regfree (&pattern_buffer); -} - - -static void -test_newline (should_match, pattern, string, cflags) - boolean should_match; - char *pattern; - char *string; - int cflags; -{ - regex_t pattern_buffer; - int error_code_returned; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0); - - if (should_match && error_code_returned == REG_NOMATCH) - { - printf ("\nNewline test failed:\n"); - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - - if (cflags & REG_NEWLINE) - printf (" The cflag REG_NEWLINE was set.\n"); - else - printf (" The cflag REG_NEWLINE wasn't set.\n"); - } - - regfree (&pattern_buffer); -} - - -static void -test_posix_match (should_match, pattern, string, cflags) - boolean should_match; - char *pattern; - char *string; - int cflags; -{ - regex_t pattern_buffer; - int error_code_returned; - boolean was_error = false; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0); - - if (should_match && error_code_returned == REG_NOMATCH) - { - printf ("\nShould have matched but didn't:\n"); - was_error = true; - } - else if (!should_match && error_code_returned != REG_NOMATCH) - { - printf ("\nShould not have matched but did:\n"); - was_error = true; - } - - if (was_error) - { - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - } - - regfree (&pattern_buffer); -} - - -static void -test_regexec () -{ - regmatch_t pmatch[3]; - regmatch_t correct_pmatch[3]; - int cflags = 0; - int eflags = 0; - - printf ("\nStarting regexec tests.\n"); - - cflags = REG_NOSUB; /* shouldn't look at any of pmatch. */ - test_pmatch ("a", "a", 0, pmatch, correct_pmatch, cflags); - - /* Ask for less `pmatch'es than there are pattern subexpressions. - (Shouldn't look at pmatch[2]. */ - cflags = REG_EXTENDED; - fill_pmatch (correct_pmatch, 0, 1, 0, 1, 100, 101); - test_pmatch ("((a))", "a", 2, pmatch, correct_pmatch, cflags); - - /* Ask for same number of `pmatch'es as there are pattern subexpressions. */ - cflags = REG_EXTENDED; - fill_pmatch(correct_pmatch, 0, 1, 0, 1, -1, -1); - test_pmatch ("(a)", "a", 2, pmatch, correct_pmatch, cflags); - - /* Ask for more `pmatch'es than there are pattern subexpressions. */ - cflags = REG_EXTENDED; - fill_pmatch (correct_pmatch, 0, 1, -1, -1, -1, -1); - test_pmatch ("a", "a", 2, pmatch, correct_pmatch, cflags); - - eflags = REG_NOTBOL; - test_eflags (true, false, "^a", "a", cflags, eflags); - test_eflags (true, false, "(^a)", "a", cflags, eflags); - test_eflags (true, false, "a|^b", "b", cflags, eflags); - test_eflags (true, false, "^b|a", "b", cflags, eflags); - - eflags = REG_NOTEOL; - test_eflags (false, true, "a$", "a", cflags, eflags); - test_eflags (false, true, "(a$)", "a", cflags, eflags); - test_eflags (false, true, "a|b$", "b", cflags, eflags); - test_eflags (false, true, "b$|a", "b", cflags, eflags); - - eflags = REG_NOTBOL | REG_NOTEOL; - test_eflags (true, true, "^a$", "a", cflags, eflags); - test_eflags (true, true, "(^a$)", "a", cflags, eflags); - test_eflags (true, true, "a|(^b$)", "b", cflags, eflags); - test_eflags (true, true, "(^b$)|a", "b", cflags, eflags); - - cflags = REG_ICASE; - test_ignore_case (true, "a", "a", cflags); - test_ignore_case (true, "A", "A", cflags); - test_ignore_case (true, "A", "a", cflags); - test_ignore_case (true, "a", "A", cflags); - - test_ignore_case (true, "@", "@", cflags); - test_ignore_case (true, "\\[", "[", cflags); - test_ignore_case (true, "`", "`", cflags); - test_ignore_case (true, "{", "{", cflags); - - test_ignore_case (true, "[!-`]", "A", cflags); - test_ignore_case (true, "[!-`]", "a", cflags); - - cflags = 0; - test_ignore_case (false, "a", "a", cflags); - test_ignore_case (false, "A", "A", cflags); - test_ignore_case (false, "A", "a", cflags); - test_ignore_case (false, "a", "A", cflags); - - test_ignore_case (true, "@", "@", cflags); - test_ignore_case (true, "\\[", "[", cflags); - test_ignore_case (true, "`", "`", cflags); - test_ignore_case (true, "{", "{", cflags); - - test_ignore_case (true, "[!-`]", "A", cflags); - test_ignore_case (false, "[!-`]", "a", cflags); - - - /* Test newline stuff. */ - cflags = REG_EXTENDED | REG_NEWLINE; - test_newline (true, "\n", "\n", cflags); - test_newline (true, "a\n", "a\n", cflags); - test_newline (true, "\nb", "\nb", cflags); - test_newline (true, "a\nb", "a\nb", cflags); - - test_newline (false, ".", "\n", cflags); - test_newline (false, "[^a]", "\n", cflags); - - test_newline (true, "\n^a", "\na", cflags); - test_newline (true, "\n(^a|b)", "\na", cflags); - test_newline (true, "a$\n", "a\n", cflags); - test_newline (true, "(a$|b)\n", "a\n", cflags); - test_newline (true, "(a$|b|c)\n", "a\n", cflags); - test_newline (true, "((a$|b|c)$)\n", "a\n", cflags); - test_newline (true, "((a$|b|c)$)\n", "b\n", cflags); - test_newline (true, "(a$|b)\n|a\n", "a\n", cflags); - - test_newline (true, "^a", "\na", cflags); - test_newline (true, "a$", "a\n", cflags); - - /* Now test normal behavior. */ - cflags = REG_EXTENDED; - test_newline (true, "\n", "\n", cflags); - test_newline (true, "a\n", "a\n", cflags); - test_newline (true, "\nb", "\nb", cflags); - test_newline (true, "a\nb", "a\nb", cflags); - - test_newline (true, ".", "\n", cflags); - test_newline (true, "[^a]", "\n", cflags); - - test_newline (false, "\n^a", "\na", cflags); - test_newline (false, "a$\n", "a\n", cflags); - - test_newline (false, "^a", "\na", cflags); - test_newline (false, "a$", "a\n", cflags); - - - /* Test that matches whole string only. */ - cflags = 0; - test_posix_match (true, "a", "a", cflags); - - /* Tests that match substrings. */ - test_posix_match (true, "a", "ab", cflags); - test_posix_match (true, "b", "ab", cflags); - - /* Test that doesn't match. */ - test_posix_match (false, "a", "b", cflags); - - printf ("\nFinished regexec tests.\n"); -} - - -static void -test_error_code_message (error_code, expected_error_message) - int error_code; - char *expected_error_message; -{ - char returned_error_message[TEST_ERRBUF_SIZE]; - char error_code_string[ERROR_CODE_LENGTH]; - size_t expected_error_message_length = strlen (expected_error_message) + 1; - size_t returned_error_message_length = regerror (error_code, 0, - returned_error_message, - TEST_ERRBUF_SIZE); - - if (returned_error_message_length != expected_error_message_length) - { - printf ("\n\n Testing returned error codes, with expected error \ -message `%s':\n", expected_error_message); - - printf ("\n\n and returned error message `%s':\n", - returned_error_message); - printf (" should have returned a length of %d but returned %d.\n", - expected_error_message_length, returned_error_message_length); - } - - if (strncmp (expected_error_message, returned_error_message, - TEST_ERRBUF_SIZE - 1) != 0) - { - - get_error_string (error_code, error_code_string), - printf ("\n\n With error code %s (%d), expected error message:\n", - error_code_string, error_code); - - printf (" `%s'\n", expected_error_message); - printf (" but got:\n"); - printf (" `%s'\n", returned_error_message); - } -} - - -static void -test_error_code_allocation (error_code, expected_error_message) - int error_code; - char *expected_error_message; -{ - char *returned_error_message = NULL; - char error_code_string[ERROR_CODE_LENGTH]; - size_t returned_error_message_length = regerror (error_code, 0, - returned_error_message, - (size_t)0); - - returned_error_message = xmalloc (returned_error_message_length + 1); - - regerror (error_code, 0, returned_error_message, - returned_error_message_length); - - if (strcmp (expected_error_message, returned_error_message) != 0) - { - get_error_string (error_code, error_code_string), - - printf ("\n\n Testing error code allocation,\n"); - printf ("with error code %s (%d), expected error message:\n", - error_code_string, error_code); - printf (" `%s'\n", expected_error_message); - printf (" but got:\n"); - printf (" `%s'\n", returned_error_message); - } -} - - -static void -test_regerror () -{ - test_error_code_message (REG_NOMATCH, "No match"); - test_error_code_message (REG_BADPAT, "Invalid regular expression"); - test_error_code_message (REG_ECOLLATE, "Invalid collation character"); - test_error_code_message (REG_ECTYPE, "Invalid character class name"); - test_error_code_message (REG_EESCAPE, "Trailing backslash"); - test_error_code_message (REG_ESUBREG, "Invalid back reference"); - test_error_code_message (REG_EBRACK, "Unmatched [ or [^"); - test_error_code_message (REG_EPAREN, "Unmatched ( or \\("); - test_error_code_message (REG_EBRACE, "Unmatched \\{"); - test_error_code_message (REG_BADBR, "Invalid content of \\{\\}"); - test_error_code_message (REG_ERANGE, "Invalid range end"); - test_error_code_message (REG_ESPACE, "Memory exhausted"); - test_error_code_message (REG_BADRPT, "Invalid preceding regular expression"); - test_error_code_message (REG_EEND, "Premature end of regular expression"); - test_error_code_message (REG_ESIZE, "Regular expression too big"); - test_error_code_allocation (REG_ERPAREN, "Unmatched ) or \\)"); -} - - -void -test_posix_interface () -{ - printf ("\nStarting POSIX interface tests.\n"); - t = posix_interface_test; - - test_regcomp (); - test_regexec (); - test_regerror (); - - printf ("\nFinished POSIX interface tests.\n"); -} diff --git a/regex-0.12/test/psx-interv.c b/regex-0.12/test/psx-interv.c @@ -1,140 +0,0 @@ -/* psx-interv.c: test POSIX intervals, both basic and extended. */ - -#include "test.h" - -void -test_intervals () -{ - printf ("\nStarting POSIX interval tests.\n"); - - test_should_match = true; - /* Valid intervals. */ - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), "abaab"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), "a", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), - "abaab", 0, 5, 2, 5, -1, -1); - - test_match (BRACES_TO_OPS ("a{0}"), ""); - test_fastmap (BRACES_TO_OPS ("a{0}"), "", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS ("a{0}"), "", 0, 0, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{0}"), "x", 0, 0, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{0,}"), ""); - test_match (BRACES_TO_OPS ("a{0,}"), "a"); - test_fastmap (BRACES_TO_OPS ("a{0,}"), "a", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,}"), "a", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,}"), "xax", 0, 0, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{1}"), "a"); - test_match (BRACES_TO_OPS ("a{1,}"), "a"); - test_match (BRACES_TO_OPS ("a{1,}"), "aa"); - test_match (BRACES_TO_OPS ("a{0,0}"), ""); - test_match (BRACES_TO_OPS ("a{0,1}"), ""); - test_match (BRACES_TO_OPS ("a{0,1}"), "a"); - test_match (BRACES_TO_OPS ("a{1,3}"), "a"); - test_match (BRACES_TO_OPS ("a{1,3}"), "aa"); - test_match (BRACES_TO_OPS ("a{1,3}"), "aaa"); - TEST_REGISTERS (BRACES_TO_OPS ("a{1,3}"), "aaa", 0, 3, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{1,3}"), "xaaax", 1, 4, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{0,3}b"), "b"); - test_match (BRACES_TO_OPS ("a{0,3}b"), "aaab"); - test_fastmap (BRACES_TO_OPS ("a{0,3}b"), "ab", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,3}b"), "b", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,3}b"), "xbx", 1, 2, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{1,3}b"), "ab"); - test_match (BRACES_TO_OPS ("a{1,3}b"), "aaab"); - test_match (BRACES_TO_OPS ("ab{1,3}c"), "abbbc"); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "b"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "ab", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "b", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "ab", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "xabx", 1, 3, 1, 2, -1, -1); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "ab"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaab"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaab", 0, 4, 2, 3, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "xaaabx", 1, 5, 3, 4, -1, -1); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "aaaab"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "ab", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "aaaab", 0, 5, 4, 4, -1, -1); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "b"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "aaab"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "ab", 0, 0); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,1}ab")), "aaaab"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,1}ab")), "aaaab", 0, 5, 0, 3, -1, -1); - - test_match (BRACES_TO_OPS (".{0,3}b"), "b"); - test_match (BRACES_TO_OPS (".{0,3}b"), "ab"); - - test_match (BRACES_TO_OPS ("[a]{0,3}b"), "b"); - test_match (BRACES_TO_OPS ("[a]{0,3}b"), "aaab"); - test_fastmap (BRACES_TO_OPS ("[a]{0,3}b"), "ab", 0, 0); - test_match (BRACES_TO_OPS ("[^a]{0,3}b"), "bcdb"); - test_match (BRACES_TO_OPS ("ab{0,3}c"), "abbbc"); - test_match (BRACES_TO_OPS ("[[:digit:]]{0,3}d"), "123d"); - test_fastmap (BRACES_TO_OPS ("[[:digit:]]{0,3}d"), "0123456789d", 0, 0); - - test_match (BRACES_TO_OPS ("\\*{0,3}a"), "***a"); - test_match (BRACES_TO_OPS (".{0,3}b"), "aaab"); - test_match (BRACES_TO_OPS ("a{0,3}a"), "aaa"); - /* Backtracking. */ - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a", 0, 0); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a", 0, 1, -1, -1, -1, -1); - - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa", 0, 0); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa", 0, 2, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{2}*"), ""); - test_match (BRACES_TO_OPS ("a{2}*"), "aa"); - - test_match (BRACES_TO_OPS ("a{1}*"), ""); - test_match (BRACES_TO_OPS ("a{1}*"), "a"); - test_match (BRACES_TO_OPS ("a{1}*"), "aa"); - - test_match (BRACES_TO_OPS ("a{1}{1}"), "a"); - - test_match (BRACES_TO_OPS ("a{1}{1}{1}"), "a"); - test_match (BRACES_TO_OPS ("a{1}{1}{2}"), "aa"); - - test_match (BRACES_TO_OPS ("a{1}{1}*"), ""); - test_match (BRACES_TO_OPS ("a{1}{1}*"), "a"); - test_match (BRACES_TO_OPS ("a{1}{1}*"), "aa"); - test_match (BRACES_TO_OPS ("a{1}{1}*"), "aaa"); - - test_match (BRACES_TO_OPS ("a{1}{2}"), "aa"); - test_match (BRACES_TO_OPS ("a{2}{1}"), "aa"); - - - test_should_match = false; - - test_match (BRACES_TO_OPS ("a{0}"), "a"); - test_match (BRACES_TO_OPS ("a{0,}"), "b"); - test_match (BRACES_TO_OPS ("a{1}"), ""); - test_match (BRACES_TO_OPS ("a{1}"), "aa"); - test_match (BRACES_TO_OPS ("a{1,}"), ""); - test_match (BRACES_TO_OPS ("a{1,}"), "b"); - test_match (BRACES_TO_OPS ("a{0,0}"), "a"); - test_match (BRACES_TO_OPS ("a{0,1}"), "aa"); - test_match (BRACES_TO_OPS ("a{0,1}"), "b"); - test_match (BRACES_TO_OPS ("a{1,3}"), ""); - test_match (BRACES_TO_OPS ("a{1,3}"), "aaaa"); - test_match (BRACES_TO_OPS ("a{1,3}"), "b"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaaab"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "bb"); - test_match (BRACES_TO_OPS ("[a]{0,3}"), "aaaa"); - test_match (BRACES_TO_OPS ("[^a]{0,3}b"), "ab"); - test_match (BRACES_TO_OPS ("ab{0,3}c"), "abababc"); - test_match (BRACES_TO_OPS ("[:alpha:]{0,3}d"), "123d"); - test_match (BRACES_TO_OPS ("\\^{1,3}a"), "a"); - test_match (BRACES_TO_OPS (".{0,3}b"), "aaaab"); - - printf ("\nFinished POSIX interval tests.\n"); -} diff --git a/regex-0.12/test/regexcpp.sed b/regex-0.12/test/regexcpp.sed @@ -1,8 +0,0 @@ -/;..*$/s/;/;\ -/g -/{ .*$/s/{/{\ -/g -/ \?[^'] /s/?/?\ -/g -/ : /s/:/:\ -/g diff --git a/regex-0.12/test/syntax.skel b/regex-0.12/test/syntax.skel @@ -1,74 +0,0 @@ -/* Print which syntax bits are set. */ - -#include <sys/types.h> -#include <stdio.h> -#include "regex.h" - -/* It's coincidental that these two are currently the same. */ -#define LONGEST_BIT_NAME "RE_UNMATCHED_RIGHT_PAREN_ORD" -#define LAST_BIT RE_UNMATCHED_RIGHT_PAREN_ORD - -/* Sum of above, when printed. Assigned in main. */ -static unsigned longest; - - -static void -test_bit (syntax, bit, name) - reg_syntax_t syntax; - unsigned bit; - char *name; -{ - char padding[100], test_str[100]; - int padding_count; - - sprintf (test_str, "%s (%d=0x%x)", name, bit, bit); - padding_count = longest - strlen (test_str); - - padding[padding_count] = 0; - while (padding_count--) - { - padding[padding_count] = ' '; - } - - printf ("%s%s (%d=0x%x): %c\n", - name, padding, bit, bit, syntax & bit ? 'y' : 'n'); -} - - -/* Macro to abbreviate the constant arguments. */ -#define TEST_BIT(bit) test_bit (syntax, bit, #bit) - -int -main (argc, argv) - int argc; - char *argv[]; -{ - reg_syntax_t syntax; - char syntax_str[1000], test_str[100]; - - switch (argc) - { - case 1: - printf ("Syntax? "); - scanf ("%s", syntax_str); - break; - - case 2: - strcpy (syntax_str, argv[1]); - break; - - default: - fprintf (stderr, "Usage: syntax [syntax].\n"); - exit (1); - } - - sscanf (syntax_str, "%i", &syntax); - - /* Figure out the longest name, so we can align the output nicely. */ - sprintf (test_str, "%s (%d=0x%x)", LONGEST_BIT_NAME, LAST_BIT, LAST_BIT); - longest = strlen (test_str); - - /* [[[replace with bit tests]]] */ - - return 0; -} diff --git a/regex-0.12/test/test.c b/regex-0.12/test/test.c @@ -1,782 +0,0 @@ -/* test.c: testing routines for regex.c. */ - -#include <assert.h> - -#ifdef STDC_HEADERS -#include <stdlib.h> -#else -char *malloc (); -char *realloc (); -#endif - -/* Just to be complete, we make both the system V/ANSI and the BSD - versions of the string functions available. */ -#if USG || STDC_HEADERS -#include <string.h> -#define index strchr -#define rindex strrchr -#define bcmp(s1, s2, len) memcmp ((s1), (s2), (len)) -#define bcopy(from, to, len) memcpy ((to), (from), (len)) -#define bzero(s, len) memset ((s), 0, (len)) -#else -#include <strings.h> -#define strchr index -#define strrchr rindex -#ifndef NEED_MEMORY_H -#define memcmp(s1, s2, n) bcmp ((s1), (s2), (n)) -#define memcpy(to, from, len) bcopy ((from), (to), (len)) -#endif -extern char *strtok (); -extern char *strstr (); -#endif /* not USG or STDC_HEADERS */ - -/* SunOS 4.1 declares memchr in <memory.h>, not <string.h>. I don't - understand why. */ -#if NEED_MEMORY_H -#include <memory.h> -#endif - -#include "test.h" - -#define BYTEWIDTH 8 - -extern void print_partial_compiled_pattern (); -extern void print_compiled_pattern (); -extern void print_double_string (); - -/* If nonzero, the results of every test are displayed. */ -boolean verbose = false; - -/* If nonzero, don't do register testing. */ -boolean omit_register_tests = true; - -/* Says whether the current test should match or fail to match. */ -boolean test_should_match; - - -static void -set_all_registers (start0, end0, start1, end1, - start2, end2, start3, end3, - start4, end4, start5, end5, - start6, end6, start7, end7, - start8, end8, start9, end9, regs) - - int start0; int end0; int start1; int end1; - int start2; int end2; int start3; int end3; - int start4; int end4; int start5; int end5; - int start6; int end6; int start7; int end7; - int start8; int end8; int start9; int end9; - struct re_registers *regs; - - { - unsigned r; - - regs->start[0] = start0; regs->end[0] = end0; - regs->start[1] = start1; regs->end[1] = end1; - regs->start[2] = start2; regs->end[2] = end2; - regs->start[3] = start3; regs->end[3] = end3; - regs->start[4] = start4; regs->end[4] = end4; - regs->start[5] = start5; regs->end[5] = end5; - regs->start[6] = start6; regs->end[6] = end6; - regs->start[7] = start7; regs->end[7] = end7; - regs->start[8] = start8; regs->end[8] = end8; - regs->start[9] = start9; regs->end[9] = end9; - for (r = 10; r < regs->num_regs; r++) - { - regs->start[r] = -1; - regs->end[r] = -1; - } - } - - - -/* Return the concatenation of S1 and S2. This would be a prime place - to use varargs. */ - -char * -concat (s1, s2) - char *s1; - char *s2; -{ - char *answer = xmalloc (strlen (s1) + strlen (s2) + 1); - - strcpy (answer, s1); - strcat (answer, s2); - - return answer; -} - - -#define OK_TO_SEARCH (nonconst_buf.fastmap_accurate && (str1 || str2)) - -/* We ignore the `can_be_null' argument. Should just be removed. */ - -void -general_test (pattern_should_be_valid, match_whole_string, - pat, str1, str2, start, range, end, correct_fastmap, - correct_regs, can_be_null) - unsigned pattern_should_be_valid; - unsigned match_whole_string; - const char *pat; - char *str1, *str2; - int start, range, end; - char *correct_fastmap; - struct re_registers *correct_regs; - int can_be_null; -{ - struct re_pattern_buffer nonconst_buf; - struct re_pattern_buffer old_buf; - struct re_registers regs; - const char *r; - char fastmap[1 << BYTEWIDTH]; - unsigned *regs_correct = NULL; - unsigned all_regs_correct = 1; - boolean fastmap_internal_error = false; - unsigned match = 0; - unsigned match_1 = 0; - unsigned match_2 = 0; - unsigned invalid_pattern = 0; - boolean internal_error_1 = false; - boolean internal_error_2 = false; - - - nonconst_buf.allocated = 8; - nonconst_buf.buffer = xmalloc (nonconst_buf.allocated); - nonconst_buf.fastmap = fastmap; - nonconst_buf.translate = 0; - - assert (pat != NULL); - r = re_compile_pattern (pat, strlen (pat), &nonconst_buf); - - /* Kludge: if we are doing POSIX testing, we really should have - called regcomp, not re_compile_pattern. As it happens, the only - way in which it matters is that re_compile_pattern sets the - newline/anchor field for matching (part of what happens when - REG_NEWLINE is given to regcomp). We have to undo that for POSIX - matching. */ - if (t == posix_basic_test || t == posix_extended_test) - nonconst_buf.newline_anchor = 0; - - invalid_pattern = r != NULL; - - if (!r) - { - int r; - - if (!pattern_should_be_valid) - printf ("\nShould have been an invalid pattern but wasn't:\n"); - else - { - fastmap_internal_error = (re_compile_fastmap (&nonconst_buf) == -2); - - if (correct_fastmap) - nonconst_buf.fastmap_accurate = - memcmp (nonconst_buf.fastmap, correct_fastmap, 1 << BYTEWIDTH) - == 0; - - if (OK_TO_SEARCH) - { - old_buf = nonconst_buf; - old_buf.buffer = (unsigned char *) xmalloc (nonconst_buf.used); - memcpy (old_buf.buffer, nonconst_buf.buffer, nonconst_buf.used); - - /* If only one string is null, call re_match or re_search, - which is what the user would probably do. */ - if (str1 == NULL && str2 != NULL - || str2 == NULL && str1 != NULL) - { - char *the_str = str1 == NULL ? str2 : str1; - - match_1 - = match_whole_string - ? (r = re_match (&nonconst_buf, the_str, - strlen (the_str), start, &regs)) - == strlen (the_str) - : (r = re_search (&nonconst_buf, - the_str, strlen (the_str), - start, range, &regs)) - >= 0; - - if (r == -2) - internal_error_1 = true; - } - else - match_1 = 1; - - /* Also call with re_match_2 or re_search_2, as they might - do this. (Also can check calling with either string1 - or string2 or both null.) */ - if (match_whole_string) - { - r = re_match_2 (&nonconst_buf, - str1, SAFE_STRLEN (str1), - str2, SAFE_STRLEN (str2), - start, &regs, end); - match_2 = r == SAFE_STRLEN (str1) + SAFE_STRLEN (str2); - } - else - { - r = re_search_2 (&nonconst_buf, - str1, SAFE_STRLEN (str1), - str2, SAFE_STRLEN (str2), - start, range, &regs, end); - match_2 = r >= 0; - } - - if (r == -2) - internal_error_2 = true; - - match = match_1 & match_2; - - if (correct_regs) - { - unsigned reg; - if (regs_correct != NULL) - free (regs_correct); - - regs_correct - = (unsigned *) xmalloc (regs.num_regs * sizeof (unsigned)); - - for (reg = 0; - reg < regs.num_regs && reg < correct_regs->num_regs; - reg++) - { - regs_correct[reg] - = (regs.start[reg] == correct_regs->start[reg] - && regs.end[reg] == correct_regs->end[reg]) -#ifdef EMPTY_REGS_CONFUSED - /* There is confusion in the standard about - the registers in some patterns which can - match either the empty string or not match. - For example, in `((a*))*' against the empty - string, the two registers can either match - the empty string (be 0/0), or not match - (because of the outer *) (be -1/-1). (Or - one can do one and one can do the other.) */ - || (regs.start[reg] == -1 && regs.end[reg] == -1 - && correct_regs->start[reg] - == correct_regs->end[reg]) -#endif - ; - - all_regs_correct &= regs_correct[reg]; - } - } - } /* OK_TO_SEARCH */ - } - } - - if (fastmap_internal_error) - printf ("\n\nInternal error in re_compile_fastmap:"); - - if (internal_error_1) - { - if (!fastmap_internal_error) - printf ("\n"); - - printf ("\nInternal error in re_match or re_search:"); - } - - if (internal_error_2) - { - if (!internal_error_1) - printf ("\n"); - - printf ("\nInternal error in re_match_2 or re_search_2:"); - } - - if ((OK_TO_SEARCH && ((match && !test_should_match) - || (!match && test_should_match)) - || (correct_regs && !all_regs_correct)) - || !nonconst_buf.fastmap_accurate - || invalid_pattern - || !pattern_should_be_valid - || internal_error_1 || internal_error_2 - || verbose) - { - if (OK_TO_SEARCH && match && !test_should_match) - { - printf ("\n\nMatched but shouldn't have:\n"); - if (match_1) - printf ("The single match/search succeeded.\n"); - - if (match_2) - printf ("The double match/search succeeded.\n"); - } - else if (OK_TO_SEARCH && !match && test_should_match) - { - printf ("\n\nDidn't match but should have:\n"); - if (!match_1) - printf ("The single match/search failed.\n"); - - if (!match_2) - printf ("The double match/search failed.\n"); - } - else if (invalid_pattern && pattern_should_be_valid) - printf ("\n\nInvalid pattern (%s):\n", r); - else if (!nonconst_buf.fastmap_accurate && pattern_should_be_valid) - printf ("\n\nIncorrect fastmap:\n"); - else if (OK_TO_SEARCH && correct_regs && !all_regs_correct) - printf ("\n\nNot all registers were correct:\n"); - else if (verbose) - printf ("\n\nTest was OK:\n"); - - - if ((!(invalid_pattern && !pattern_should_be_valid)) || verbose) - printf (" Pattern: `%s'.\n", pat); - - if (pattern_should_be_valid || verbose - || internal_error_1 || internal_error_2) - { - printf(" Strings: "); - printf ("`%s' and ", str1 == NULL ? "NULL" : str1); - printf ("`%s'.\n", str2 == NULL ? "NULL" : str2); - - if ((OK_TO_SEARCH || verbose || internal_error_1 || internal_error_2) - && !invalid_pattern) - { - if (memcmp (old_buf.buffer, nonconst_buf.buffer, - nonconst_buf.used) != 0 - && !invalid_pattern) - { - printf(" (%s)\n", r ? r : "Valid regular expression"); - printf ("\n Compiled pattern before matching: "); - print_compiled_pattern (&old_buf); - printf ("\n Compiled pattern after matching: "); - } - else - printf ("\n Compiled pattern: "); - - print_compiled_pattern (&nonconst_buf); - } - - if (correct_fastmap && (!nonconst_buf.fastmap_accurate || verbose)) - { - printf ("\n The fastmap should have been: "); - print_fastmap (correct_fastmap); - - printf ("\n Fastmap: "); - print_fastmap (fastmap); - - printf ("\n Compiled pattern before matching: "); - print_compiled_pattern (&nonconst_buf); - } - - if ((!all_regs_correct || verbose) && correct_regs) - { - unsigned this_reg; - printf ("\n Incorrect registers:"); - - for (this_reg = 0; this_reg < regs.num_regs; this_reg++) - { - if (!regs_correct[this_reg]) - { - printf ("\n Register %d's start was %2d. ", this_reg, - regs.start[this_reg]); - printf ("\tIt should have been %d.\n", - correct_regs->start[this_reg]); - printf (" Register %d's end was %2d. ", this_reg, - regs.end[this_reg]); - printf ("\tIt should have been %d.\n", - correct_regs->end[this_reg]); - } - } - } - } - } - - if (nonconst_buf.buffer != NULL) - free (nonconst_buf.buffer); - - if (OK_TO_SEARCH) - { - free (old_buf.buffer); - - if (correct_regs) - free (regs_correct); - - } - - nonconst_buf.buffer = old_buf.buffer = NULL; - regs_correct = NULL; - regs.start = regs.end = NULL; - -} /* general_test */ - - -void -test_search_return (match_start_wanted, pattern, string) - int match_start_wanted; - const char *pattern; - char *string; -{ - struct re_pattern_buffer buf; - char fastmap[1 << BYTEWIDTH]; - const char *compile_return; - int match_start; - static num_times_called = 0; - - num_times_called++; - buf.allocated = 1; - buf.buffer = xmalloc (buf.allocated); - - assert (pattern != NULL); - buf.translate = 0; - compile_return = re_compile_pattern (pattern, strlen (pattern), &buf); - - if (compile_return) - { - printf ("\n\nInvalid pattern in test_match_start:\n"); - printf ("%s\n", compile_return); - } - else - { - buf.fastmap = fastmap; - match_start = re_search (&buf, string, strlen (string), - 0, strlen (string), 0); - - if (match_start != match_start_wanted) - printf ("\nWanted search to start at %d but started at %d.\n", - match_start, match_start_wanted); - } - free (buf.buffer); - buf.buffer = NULL; -} - - -#define SET_FASTMAP() \ - { \ - unsigned this_char; \ - \ - memset (correct_fastmap, invert, (1 << BYTEWIDTH)); \ - \ - for (this_char = 0; this_char < strlen (fastmap_string); this_char++)\ - correct_fastmap[fastmap_string[this_char]] = !invert; \ - correct_fastmap['\n'] = match_newline; \ - } - - -void -test_fastmap (pat, fastmap_string, invert, match_newline) - const char *pat; - char *fastmap_string; - unsigned invert; - unsigned match_newline; -{ - char correct_fastmap[(1 << BYTEWIDTH)]; - - SET_FASTMAP (); - general_test (1, 0, pat, NULL, NULL, -1, 0, -1, correct_fastmap, 0, -1); -} - - -void -test_fastmap_search (pat, str, fastmap_string, invert, match_newline, - can_be_null, start0, end0) - const char *pat; - char *str; - char *fastmap_string; - unsigned invert; - unsigned match_newline; - int can_be_null; - int start0; - int end0; -{ - char correct_fastmap[(1 << BYTEWIDTH)]; - struct re_registers correct_regs; - - correct_regs.num_regs = RE_NREGS; - correct_regs.start = (int *) xmalloc (RE_NREGS * sizeof (int)); - correct_regs.end = (int *) xmalloc (RE_NREGS * sizeof (int)); - - set_all_registers (start0, end0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, &correct_regs); - SET_FASTMAP (); - general_test (1, 0, pat, str, NULL, 0, SAFE_STRLEN (str), SAFE_STRLEN (str), - correct_fastmap, &correct_regs, can_be_null); - - free (correct_regs.start); - free (correct_regs.end); -} - - - - -void -test_all_registers (pat, str1, str2, - start0, end0, start1, end1, - start2, end2, start3, end3, - start4, end4, start5, end5, - start6, end6, start7, end7, - start8, end8, start9, end9) - char *pat; char *str1; char *str2; - int start0; int end0; int start1; int end1; - int start2; int end2; int start3; int end3; - int start4; int end4; int start5; int end5; - int start6; int end6; int start7; int end7; - int start8; int end8; int start9; int end9; -{ - struct re_registers correct_regs; - - if (omit_register_tests) return; - - correct_regs.num_regs = RE_NREGS; - correct_regs.start = (int *) xmalloc (RE_NREGS * sizeof (int)); - correct_regs.end = (int *) xmalloc (RE_NREGS * sizeof (int)); - - set_all_registers (start0, end0, start1, end1, start2, end2, start3, end3, - start4, end4, start5, end5, start6, end6, start7, end7, - start8, end8, start9, end9, &correct_regs); - - general_test (1, 0, pat, str1, str2, 0, - SAFE_STRLEN (str1) + SAFE_STRLEN (str2), - SAFE_STRLEN (str1) + SAFE_STRLEN (str2), - NULL, &correct_regs, -1); - - free (correct_regs.start); - free (correct_regs.end); -} - - -void -invalid_pattern (error_code_expected, pattern) - int error_code_expected; - char *pattern; -{ - regex_t pattern_buffer; - int cflags - = re_syntax_options == RE_SYNTAX_POSIX_EXTENDED - || re_syntax_options == RE_SYNTAX_POSIX_MINIMAL_EXTENDED - ? REG_EXTENDED : 0; - - test_compile (0, error_code_expected, pattern, &pattern_buffer, cflags); -} - - -void -valid_pattern (pattern) - char *pattern; -{ - regex_t pattern_buffer; - int cflags - = re_syntax_options == RE_SYNTAX_POSIX_EXTENDED - || re_syntax_options == RE_SYNTAX_POSIX_MINIMAL_EXTENDED - ? REG_EXTENDED : 0; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); -} - - -char * -delimiters_to_ops (source, left_delimiter, right_delimiter) - char *source; - char left_delimiter; - char right_delimiter; -{ - static char *answer = NULL; - char *tmp = NULL; - boolean double_size = false; - unsigned source_char; - unsigned answer_char = 0; - - assert (source != NULL); - - switch (left_delimiter) - { - case '(': if (!(re_syntax_options & RE_NO_BK_PARENS)) - double_size = true; - break; - case '{': if (!(re_syntax_options & RE_NO_BK_BRACES)) - double_size = true; - break; - default: printf ("Found strange delimiter %c in delimiter_to_ops.\n", - left_delimiter); - printf ("The source was `%s'\n", source); - exit (0); - } - - if (answer == source) - { - tmp = (char *) xmalloc (strlen (source) + 1); - strcpy (tmp, source); - source = tmp; - } - - if (answer) - { - free (answer); - answer = NULL; - } - - answer = (char *) xmalloc ((double_size - ? strlen (source) << 1 - : strlen (source)) - + 1); - if (!double_size) - strcpy (answer, source); - else - { - for (source_char = 0; source_char < strlen (source); source_char++) - { - if (source[source_char] == left_delimiter - || source[source_char] == right_delimiter) - answer[answer_char++] = '\\'; - - answer[answer_char++] = source[source_char]; - } - answer[answer_char] = 0; - } - - return answer; -} - - -void -print_pattern_info (pattern, pattern_buffer_ptr) - const char *pattern; - regex_t *pattern_buffer_ptr; -{ - printf (" Pattern: `%s'.\n", pattern); - printf (" Compiled pattern: "); - print_compiled_pattern (pattern_buffer_ptr); -} - - -void -valid_nonposix_pattern (pattern) - char *pattern; -{ - struct re_pattern_buffer nonconst_buf; - - nonconst_buf.allocated = 0; - nonconst_buf.buffer = NULL; - nonconst_buf.translate = NULL; - - assert (pattern != NULL); - - if (re_compile_pattern (pattern, strlen (pattern), &nonconst_buf)) - { - printf ("Couldn't compile the pattern.\n"); - print_pattern_info (pattern, &nonconst_buf); - } -} - - -void -compile_and_print_pattern (pattern) - char *pattern; -{ - struct re_pattern_buffer nonconst_buf; - - nonconst_buf.allocated = 0; - nonconst_buf.buffer = NULL; - - if (re_compile_pattern (pattern, strlen (pattern), &nonconst_buf)) - printf ("Couldn't compile the pattern.\n"); - - print_pattern_info (pattern, &nonconst_buf); -} - - -void -test_case_fold (pattern, string) - const char *pattern; - char* string; -{ - struct re_pattern_buffer nonconst_buf; - const char *ret; - - init_pattern_buffer (&nonconst_buf); - nonconst_buf.translate = upcase; - - assert (pattern != NULL); - ret = re_compile_pattern (pattern, strlen (pattern), &nonconst_buf); - - if (ret) - { - printf ("\nShould have been a valid pattern but wasn't.\n"); - print_pattern_info (pattern, &nonconst_buf); - } - else - { - if (test_should_match - && re_match (&nonconst_buf, string, strlen (string), 0, 0) - != strlen (string)) - { - printf ("Match failed for case fold.\n"); - printf (" Pattern: `%s'.\n", pattern); - printf (" String: `%s'.\n", string == NULL ? "NULL" : string); - } - } -} - - -void -test_match_n_times (n, pattern, string) - unsigned n; - char* pattern; - char* string; -{ - struct re_pattern_buffer buf; - const char *r; - unsigned match = 0; - unsigned this_match; - - buf.allocated = 0; - buf.buffer = NULL; - buf.translate = 0; - - assert (pattern != NULL); - - r = re_compile_pattern (pattern, strlen (pattern), &buf); - if (r) - { - printf ("Didn't compile.\n"); - printf (" Pattern: %s.\n", pattern); - } - else - { - for (this_match = 1; this_match <= n; this_match++) - match = (re_match (&buf, string, strlen (string), - 0, 0) - == strlen (string)); - - if (match && !test_should_match) - printf ("\n\nMatched but shouldn't have:\n"); - else if (!match && test_should_match) - printf ("\n\nDidn't match but should have:\n"); - - if ((match && !test_should_match) || (!match && test_should_match)) - { - printf(" The string to match was: "); - if (string) - printf ("`%s' and ", string); - else - printf ("`'"); - - printf (" Pattern: %s.\n", pattern); - printf (" Compiled pattern: %s.\n", pattern); - print_compiled_pattern (&buf); - } - } -} - - -void -test_match_2 (pat, str1, str2) - const char *pat; - char *str1; - char *str2; -{ - general_test (1, 1, pat, str1, str2, 0, 1, - SAFE_STRLEN (str1) + SAFE_STRLEN (str2), NULL, 0, -1); -} - -void -test_match (pat, str) - const char *pat; - char *str; -{ - test_match_2 (pat, str, NULL); - test_match_2 (pat, NULL, str); -} diff --git a/regex-0.12/test/test.h b/regex-0.12/test/test.h @@ -1,141 +0,0 @@ -/* test.h: for Regex testing. */ - -#ifndef TEST_H -#define TEST_H - -#include <stdio.h> -#include <assert.h> - -#include <sys/types.h> -#include "regex.h" - - -/* A strlen that works even on a null pointer. */ -#define SAFE_STRLEN(s) (s == NULL ? 0 : strlen (s)) - -typedef enum { false = 0, true = 1 } boolean; - -extern boolean test_should_match; -extern boolean omit_register_tests; -extern void *xmalloc (); - -/* Defined in upcase.c. */ -extern char upcase[]; - -typedef enum -{ - all_test, - other_test, - posix_basic_test, - posix_extended_test, - posix_interface_test, - regress_test -} test_type; - -extern test_type t; - - -#if __STDC__ - -extern char *concat (char *, char *); - -extern void general_test (unsigned pattern_should_be_valid, - unsigned match_whole_string, - const char *pat, char *str1, char *str2, - int start, int range, int end, - char *correct_fastmap, - struct re_registers *correct_regs, int can_be_null); - - -extern void init_pattern_buffer (regex_t *pattern_buffer_ptr); - -extern void test_compile (unsigned valid_pattern, int error_code_expected, - const char *pattern, regex_t *pattern_buffer_ptr, - int cflags); - -extern char *delimiter_to_ops (char *source, char left_delimiter, - char right_delimiter); - - -extern void test_search_return (int, const char *, char *); - -extern void test_berk_search (const char *pattern, char *string); - -extern void test_fastmap (const char *pat, char *fastmap_string, unsigned invert, - unsigned match_newline); - -extern void test_fastmap_search (const char *pat, char *str, char *fastmap_string, - unsigned invert, unsigned match_newline, - int can_be_null, int start0, int end0); - -extern void test_all_registers (char *pat, char *str1, char *str2, - int start0, int end0, int start1, int end1, - int start2, int end2, int start3, int end3, - int start4, int end4, int start5, int end5, - int start6, int end6, int start7, int end7, - int start8, int end8, int start9, int end9); - -extern void print_pattern_info (const char *pattern, regex_t *pattern_buffer_ptr); -extern void compile_and_print_pattern (char *pattern); - -extern void test_case_fold (const char *pattern, char* string); - -extern void test_posix_generic (); - -extern void test_grouping (); - -extern void invalid_pattern (int error_code_expected, char *pattern); -extern void valid_nonposix_pattern (char *pattern); -extern void valid_pattern (char *pattern); - -extern void test_match_2 (const char *pat, char *str1, char *str2); -extern void test_match (const char *pat, char *str); - -#endif /* __STDC__ */ - - -#define TEST_REGISTERS_2(pat, str1, str2, start0, end0, start1, end1, start2, end2)\ - if (!omit_register_tests) \ - test_all_registers (pat, str1, str2, start0, end0, start1, end1, \ - start2, end2, -1, -1, -1, -1, -1, -1, -1, -1,\ - -1, -1, -1, -1, -1, -1) \ - - -#define TEST_REGISTERS(pat, str, start0, end0, start1, end1, start2, end2) \ - TEST_REGISTERS_2 (pat, str, NULL, start0, end0, start1, end1, start2, end2)\ - -#define BRACES_TO_OPS(string) ((char *) delimiters_to_ops (string, '{', '}')) -#define PARENS_TO_OPS(string) ((char *) delimiters_to_ops (string, '(', ')')) - -#define INVALID_PATTERN(pat) \ - general_test (0, 0, pat, NULL, NULL, -1, 0, -1, NULL, 0, -1) - - -#define MATCH_SELF(p) test_match (p, p) - -#define TEST_POSITIONED_MATCH(pat, str, start) \ - general_test (1, 0, pat, str, NULL, start, 1, SAFE_STRLEN (str), \ - NULL, 0, -1) - -#define TEST_TRUNCATED_MATCH(pat, str, end) \ - general_test (1, 0, pat, str, NULL, 0, 1, end, NULL, 0, -1) - -#define TEST_SEARCH_2(pat, str1, str2, start, range, one_past_end) \ - general_test (1, 0, pat, str1, str2, start, range, one_past_end, \ - NULL, 0, -1) - -#define TEST_SEARCH(pat, str, start, range) \ - { \ - TEST_SEARCH_2 (pat, str, NULL, start, range, SAFE_STRLEN (str)); \ - TEST_SEARCH_2 (pat, NULL, str, start, range, SAFE_STRLEN (str)); \ - } - -#endif /* TEST_H */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex-0.12/test/tregress.c b/regex-0.12/test/tregress.c @@ -1,464 +0,0 @@ -/* tregress.c: reported bugs. The `t' just makes the filename not have - a common prefix with `regex.c', so completion works better. */ - -#include "test.h" - - -boolean pause_at_error = true; - -char * -itoa (i) - int i; -{ - char *a = xmalloc (21); /* sign + 19 digits (enough for 64 bits) + null */ - - sprintf (a, "%d", i); - return a; -} - - -static void -simple_fail (routine, pat, buf, str, ret) - const char *routine; - const char *pat; - struct re_pattern_buffer *buf; - const char *str; - char *ret; -{ - fprintf (stderr, "Failed %s (return = %s).\n", routine, ret); - if (str && *str) fprintf (stderr, " String = %s\n", str); - fprintf (stderr, " Pattern = %s\n", pat); - print_compiled_pattern (buf); - - if (pause_at_error) - { - fprintf (stderr, "RET to continue: "); - (void) getchar (); - } -} - - -/* Abbreviate the most common calls. */ - -static void -simple_compile (pat, buf) - const char *pat; - struct re_pattern_buffer *buf; -{ - const char *ret = re_compile_pattern (pat, strlen (pat), buf); - - if (ret != NULL) simple_fail ("compile", pat, buf, NULL, ret); -} - - -static void -simple_fastmap (pat) - const char *pat; -{ - struct re_pattern_buffer buf; - char fastmap[256]; - int ret; - - buf.allocated = 0; - buf.buffer = buf.translate = NULL; - buf.fastmap = fastmap; - - simple_compile (pat, &buf); - - ret = re_compile_fastmap (&buf); - - if (ret != 0) simple_fail ("fastmap compile", pat, &buf, NULL, itoa (ret)); -} - - -#define SIMPLE_MATCH(pat, str) do_match (pat, str, strlen (str)) -#define SIMPLE_NONMATCH(pat, str) do_match (pat, str, -1) - -static void -do_match (pat, str, expected) - const char *pat, *str; - int expected; -{ - int ret; - unsigned len; - struct re_pattern_buffer buf; - - buf.allocated = 0; - buf.buffer = buf.translate = buf.fastmap = NULL; - - simple_compile (pat, &buf); - - len = strlen (str); - - ret = re_match_2 (&buf, NULL, 0, str, len, 0, NULL, len); - - if (ret != expected) simple_fail ("match", pat, &buf, str, itoa (ret)); -} - - -static void -simple_search (pat, str, correct_startpos) - const char *pat, *str; - int correct_startpos; -{ - int ret; - unsigned len; - struct re_pattern_buffer buf; - - buf.allocated = 0; - buf.buffer = buf.translate = buf.fastmap = NULL; - - simple_compile (pat, &buf); - - len = strlen (str); - - ret = re_search_2 (&buf, NULL, 0, str, len, 0, len, NULL, len); - - if (ret != correct_startpos) - simple_fail ("match", pat, &buf, str, itoa (ret)); -} - -/* Past bugs people have reported. */ - -void -test_regress () -{ - extern char upcase[]; - struct re_pattern_buffer buf; - unsigned len; - struct re_registers regs; - int ret; - char *fastmap = xmalloc (256); - - buf.translate = NULL; - buf.fastmap = NULL; - buf.allocated = 0; - buf.buffer = NULL; - - printf ("\nStarting regression tests.\n"); - t = regress_test; - - test_should_match = true; - re_set_syntax (RE_SYNTAX_EMACS); - - /* enami@sys.ptg.sony.co.jp 10 Nov 92 15:19:02 JST */ - buf.translate = upcase; - SIMPLE_MATCH ("[A-[]", "A"); - buf.translate = NULL; - - /* meyering@cs.utexas.edu Nov 6 22:34:41 1992 */ - simple_search ("\\w+", "a", 0); - - /* jimb@occs.cs.oberlin.edu 10 Sep 92 00:42:33 */ - buf.translate = upcase; - SIMPLE_MATCH ("[\001-\377]", "\001"); - SIMPLE_MATCH ("[\001-\377]", "a"); - SIMPLE_MATCH ("[\001-\377]", "\377"); - buf.translate = NULL; - - /* mike@skinner.cs.uoregon.edu 1 Sep 92 01:45:22 */ - SIMPLE_MATCH ("^^$", "^"); - - /* pclink@qld.tne.oz.au Sep 7 22:42:36 1992 */ - re_set_syntax (RE_INTERVALS); - SIMPLE_MATCH ("^a\\{3\\}$", "aaa"); - SIMPLE_NONMATCH ("^a\\{3\\}$", "aa"); - re_set_syntax (RE_SYNTAX_EMACS); - - /* pclink@qld.tne.oz.au, 31 Aug 92. (conjecture) */ - re_set_syntax (RE_INTERVALS); - simple_search ("a\\{1,3\\}b", "aaab", 0); - simple_search ("a\\{1,3\\}b", "aaaab", 1); - re_set_syntax (RE_SYNTAX_EMACS); - - /* trq@dionysos.thphys.ox.ac.uk, 31 Aug 92. (simplified) */ - simple_fastmap ("^.*\n[ ]*"); - - /* wind!greg@plains.NoDak.edu, 25 Aug 92. (simplified) */ - re_set_syntax (RE_INTERVALS); - SIMPLE_MATCH ("[a-zA-Z]*.\\{5\\}", "xN0000"); - SIMPLE_MATCH ("[a-zA-Z]*.\\{5\\}$", "systemxN0000"); - SIMPLE_MATCH ("\\([a-zA-Z]*\\).\\{5\\}$", "systemxN0000"); - re_set_syntax (RE_SYNTAX_EMACS); - - /* jimb, 18 Aug 92. Don't use \000, so `strlen' (in our testing - routines) will work. (This still tickles the bug jimb reported.) */ - SIMPLE_MATCH ("[\001-\377]", "\001"); - SIMPLE_MATCH ("[\001-\377]", "a"); - SIMPLE_MATCH ("[\001-\377]", "\377"); - - /* jimb, 13 Aug 92. */ - SIMPLE_MATCH ("[\001-\177]", "\177"); - - /* Tests based on bwoelfel's below. */ - SIMPLE_MATCH ("\\(a\\|ab\\)*", "aab"); - SIMPLE_MATCH ("\\(a\\|ab\\)+", "aab"); - SIMPLE_MATCH ("\\(a*\\|ab\\)+", "aab"); - SIMPLE_MATCH ("\\(a+\\|ab\\)+", "aab"); - SIMPLE_MATCH ("\\(a?\\|ab\\)+", "aab"); - - /* bwoelfel@widget.seas.upenn.edu, 25 Jul 92. */ - SIMPLE_MATCH ("^\\([ab]+\\|bc\\)+", "abc"); - - /* jla, 3 Jul 92. Core dump in re_search_2. */ - buf.fastmap = fastmap; - buf.translate = upcase; -#define DATEDUMP_PATTERN " *[0-9]*:" - if (re_compile_pattern (DATEDUMP_PATTERN, strlen (DATEDUMP_PATTERN), &buf) - != NULL) - printf ("date dump compile failed.\n"); - regs.num_regs = 0; - regs.start = regs.end = NULL; - if (re_search_2 (&buf, NULL, 0, "Thu Jul 2 18:34:18 1992", - 24, 3, 21, &regs, 24) != 10) - printf ("date dump search failed.\n"); - buf.fastmap = 0; - buf.translate = 0; - - - /* rms, 4 Jul 1992. Pattern is much slower in Emacs 19. Fastmap - should be only a backslash. */ -#define BEGINEND_PATTERN "\\(\\\\begin\\s *{\\)\\|\\(\\\\end\\s *{\\)" - test_fastmap (BEGINEND_PATTERN, "\\", false, 0); - - - /* kaoru@is.s.u-tokyo.ac.jp, 27 Jun 1992. Code for [a-z] (in regex.c) - should translate the whole set. */ - buf.translate = upcase; -#define CASE_SET_PATTERN "[ -`]" - if (re_compile_pattern (CASE_SET_PATTERN, strlen (CASE_SET_PATTERN), &buf) - != NULL) - printf ("case set compile failed.\n"); - if (re_match_2 (&buf, "K", 1, "", 0, 0, NULL, 1) != 1) - printf ("case set match failed.\n"); - -#define CASE_SET_PATTERN2 "[`-|]" - if (re_compile_pattern (CASE_SET_PATTERN2, strlen (CASE_SET_PATTERN2), &buf) - != NULL) - printf ("case set2 compile failed.\n"); - if (re_match_2 (&buf, "K", 1, "", 0, 0, NULL, 1) != 1) - printf ("case set2 match failed.\n"); - - buf.translate = NULL; - - - /* jimb, 27 Jun 92. Problems with gaps in the string. */ -#define GAP_PATTERN "x.*y.*z" - if (re_compile_pattern (GAP_PATTERN, strlen (GAP_PATTERN), &buf) != NULL) - printf ("gap didn't compile.\n"); - if (re_match_2 (&buf, "x-", 2, "y-z-", 4, 0, NULL, 6) != 5) - printf ("gap match failed.\n"); - - - /* jimb, 19 Jun 92. Since `beginning of word' matches at the - beginning of the string, then searching ought to find it there. - If `re_compile_fastmap' is not called, then it works ok. */ - buf.fastmap = fastmap; -#define BOW_BEG_PATTERN "\\<" - if (re_compile_pattern (BOW_BEG_PATTERN, strlen (BOW_BEG_PATTERN), &buf) - != NULL) - printf ("begword-begstring didn't compile.\n"); - if (re_search (&buf, "foo", 3, 0, 3, NULL) != 0) - printf ("begword-begstring search failed.\n"); - - /* Same bug report, different null-matching pattern. */ -#define EMPTY_ANCHOR_PATTERN "^$" - if (re_compile_pattern (EMPTY_ANCHOR_PATTERN, strlen (EMPTY_ANCHOR_PATTERN), - &buf) != NULL) - printf ("empty anchor didn't compile.\n"); - if (re_search (&buf, "foo\n\nbar", 8, 0, 8, NULL) != 4) - printf ("empty anchor search failed.\n"); - - /* jimb@occs.cs.oberlin.edu, 21 Apr 92. After we first allocate - registers for a particular re_pattern_buffer, we might have to - reallocate more registers on subsequent calls -- and we should be - reusing the same memory. */ -#define ALLOC_REG_PATTERN "\\(abc\\)" - free (buf.fastmap); - buf.fastmap = 0; - if (re_compile_pattern (ALLOC_REG_PATTERN, strlen (ALLOC_REG_PATTERN), &buf) - != NULL) - printf ("register allocation didn't compile.\n"); - if (re_match (&buf, "abc", 3, 0, &regs) != 3) - printf ("register allocation didn't match.\n"); - if (regs.start[1] != 0 || regs.end[1] != 3) - printf ("register allocation reg #1 wrong.\n"); - - { - int *old_regstart = regs.start; - int *old_regend = regs.end; - - if (re_match (&buf, "abc", 3, 0, &regs) != 3) - printf ("register reallocation didn't match.\n"); - if (regs.start[1] != 0 || regs.end[1] != 3 - || old_regstart[1] != 0 || old_regend[1] != 3 - || regs.start != old_regstart || regs.end != old_regend) - printf ("register reallocation registers wrong.\n"); - } - - /* jskudlarek@std.MENTORG.COM, 21 Apr 92 (string-match). */ -#define JSKUD_PATTERN "[^/]+\\(/[^/.]+\\)?/[0-9]+$" - if (re_compile_pattern (JSKUD_PATTERN, strlen (JSKUD_PATTERN), &buf) != NULL) - printf ("jskud test didn't compile.\n"); - if (re_search (&buf, "a/1", 3, 0, 3, &regs) != 0) - printf ("jskud test didn't match.\n"); - if (regs.start[1] != -1 || regs.end[1] != -1) - printf ("jskud test, reg #1 wrong.\n"); - - /* jla's bug (with string-match), 5 Feb 92. */ - TEST_SEARCH ("\\`[ \t\n]*", "jla@challenger (Joseph Arceneaux)", 0, 100); - - /* jwz@lucid.com, 8 March 1992 (re-search-forward). (His is the - second.) These are not supposed to match. */ -#if 0 - /* This one fails quickly, because we can change the maybe_pop_jump - from the + to a pop_failure_pop, because of the c's. */ - TEST_SEARCH ("^\\(To\\|CC\\):\\([^c]*\\)+co", -"To: hbs%titanic@lucid.com (Harlan Sexton)\n\ -Cc: eb@thalidomide, jlm@thalidomide\n\ -Subject: Re: so is this really as horrible an idea as it seems to me?\n\ -In-Reply-To: Harlan Sexton's message of Sun 8-Mar-92 11:00:06 PST <9203081900.AA24794@titanic.lucid>\n\ -References: <9203080736.AA05869@thalidomide.lucid>\n\ - <9203081900.AA24794@titanic.lucid>", 0, 5000); - - /* This one takes a long, long time to complete, because we have to - keep the failure points around because we might backtrack. */ - TEST_SEARCH ("^\\(To\\|CC\\):\\(.*\n.*\\)+co", - /* "X-Windows: The joke that kills.\n\ -FCC: /u/jwz/VM/inbox\n\ -From: Jamie Zawinski <jwz@lucid.com>\n\ */ -"To: hbs%titanic@lucid.com (Harlan Sexton)\n\ -Cc: eb@thalidomide, jlm@thalidomide\n\ -Subject: Re: so is this really as horrible an idea as it seems to me?\n\ -In-Reply-To: Harlan Sexton's message of Sun 8-Mar-92 11:00:06 PST <9203081900.AA24794@titanic.lucid>\n\ -References: <9203080736.AA05869@thalidomide.lucid>\n\ - <9203081900.AA24794@titanic.lucid>", 0, 5000); -#endif /* 0 [failed searches] */ - - - /* macrakis' bugs. */ - buf.translate = upcase; /* message of 24 Jan 91 */ - if (re_compile_pattern ("[!-`]", 5, &buf) != NULL) - printf ("Range test didn't compile.\n"); - if (re_match (&buf, "A", 1, 0, NULL) != 1) - printf ("Range test #1 didn't match.\n"); - if (re_match (&buf, "a", 1, 0, NULL) != 1) - printf ("Range test #2 didn't match.\n"); - - buf.translate = 0; -#define FAO_PATTERN "\\(f\\(.\\)o\\)+" - if (re_compile_pattern (FAO_PATTERN, strlen (FAO_PATTERN), &buf) != NULL) - printf ("faofdx test didn't compile.\n"); - if (re_search (&buf, "faofdx", 6, 0, 6, &regs) != 0) - printf ("faofdx test didn't match.\n"); - if (regs.start[1] != 0 || regs.end[1] != 3) - printf ("faofdx test, reg #1 wrong.\n"); - if (regs.start[2] != 1 || regs.end[2] != 2) - printf ("faofdx test, reg #2 wrong.\n"); - - TEST_REGISTERS ("\\(a\\)*a", "aaa", 0, 3, 1, 2, -1, -1); - test_fastmap ("^\\([^ \n]+:\n\\)+\\([^ \n]+:\\)", " \n", 1, 0); - - /* 40 lines, 48 a's in each line. */ - test_match ("^\\([^ \n]+:\n\\)+\\([^ \n]+:\\)", - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:"); - - /* 640 a's followed by one b, twice. */ - test_match ("\\(.*\\)\\1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"); - - /* 640 a's followed by two b's, twice. */ - test_match ("\\(.*\\)\\1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb"); - - - /* Dave G. bug: Reference to a subexpression which didn't match. - Should fail. */ - re_set_syntax (RE_NO_BK_PARENS | RE_NO_BK_VBAR); - test_match ("(ooooooooooone())-annnnnnnnnnnd-(twooooooooooo\\2)", - "ooooooooooone-annnnnnnnnnnd-twooooooooooo"); - test_match ("(o|t)", "o"); - test_match ("(o()|t)", "o"); - test_match ("(o|t)", "o"); - test_match ("(ooooooooooooooo|tttttttttttttttt())", "ooooooooooooooo"); - test_match ("(o|t())", "o"); - test_match ("(o()|t())", "o"); - test_match ("(ooooooooooooooooooooooooone()|twooooooooooooooooooooooooo())", "ooooooooooooooooooooooooone"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "o-a-t"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "t-a-f"); - - test_should_match = 0; - test_match ("(foo(bar)|second)\\2", "second"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "t-a-t"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "o-a-f"); - - re_set_syntax (RE_SYNTAX_EMACS); - test_match ("\\(foo\\(bar\\)\\|second\\)\\2", "secondbar"); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "one-and-four"); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "two-and-three"); - - test_should_match = 1; - re_set_syntax (RE_SYNTAX_EMACS); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "one-and-three"); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "two-and-four"); - - TEST_REGISTERS (":\\(.*\\)", ":/", 0, 2, 1, 2, -1, -1); - - /* Bug with `upcase' translation table, from Nico Josuttis - <nico@bredex.de> */ - test_should_match = 1; - test_case_fold ("[a-a]", "a"); - - printf ("\nFinished regression tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/regex-0.12/test/upcase.c b/regex-0.12/test/upcase.c @@ -1,39 +0,0 @@ -/* Indexed by a character, gives the upper case equivalent of the - character. */ - -char upcase[0400] = - { 000, 001, 002, 003, 004, 005, 006, 007, - 010, 011, 012, 013, 014, 015, 016, 017, - 020, 021, 022, 023, 024, 025, 026, 027, - 030, 031, 032, 033, 034, 035, 036, 037, - 040, 041, 042, 043, 044, 045, 046, 047, - 050, 051, 052, 053, 054, 055, 056, 057, - 060, 061, 062, 063, 064, 065, 066, 067, - 070, 071, 072, 073, 074, 075, 076, 077, - 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, - 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, - 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, - 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, - 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, - 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, - 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, - 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, - 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, - 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, - 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, - 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, - 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, - 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, - 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, - 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, - 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, - 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, - 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, - 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, - 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, - 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, - 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, - 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 - }; - - diff --git a/regex-0.12/test/xmalloc.c b/regex-0.12/test/xmalloc.c @@ -1,21 +0,0 @@ -#include <stdio.h> -extern char *malloc (); - -#ifndef NULL -#define NULL 0 -#endif - -void * -xmalloc (size) - unsigned size; -{ - char *new_mem = malloc (size); - - if (new_mem == NULL) - { - fprintf (stderr, "xmalloc: request for %u bytes failed.\n", size); - abort (); - } - - return new_mem; -}