MFB

23 years ago · 9fc9e4b2cf
22 changed files with 4370 additions and 3667 deletions
--- a/1
+++ b/1
@ -1,6 +1,7 @@
 PHP                                                                        NEWS
 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
 ?? Oct 2003, PHP 5 Beta 3
+- Upgraded PCRE library to version 4.5. (Andrei)
 - Dropped Windows 95 support. (Andi)
 - Moved extensions to PECL:
  . ext/crack (Jani, Derick)
--- a/ext/pcre/pcrelib/AUTHORS
+++ b/ext/pcre/pcrelib/AUTHORS
@ -3,4 +3,4 @@ Written by: Philip Hazel <ph10@cam.ac.uk>
 University of Cambridge Computing Service,
 Cambridge, England. Phone: +44 1223 334714.

-Copyright (c) 1997-2001 University of Cambridge
+Copyright (c) 1997-2003 University of Cambridge
--- a/ext/pcre/pcrelib/COPYING
+++ b/ext/pcre/pcrelib/COPYING
@ -9,7 +9,7 @@ Written by: Philip Hazel <ph10@cam.ac.uk>
 University of Cambridge Computing Service,
 Cambridge, England. Phone: +44 1223 334714.

-Copyright (c) 1997-2001 University of Cambridge
+Copyright (c) 1997-2003 University of Cambridge

 Permission is granted to anyone to use this software for any purpose on any
 computer system, and to redistribute it freely, subject to the following
--- a/ext/pcre/pcrelib/ChangeLog
+++ b/ext/pcre/pcrelib/ChangeLog
@ -1,6 +1,161 @@
 ChangeLog for PCRE
 ------------------

+Version 4.5 01-Dec-03
+---------------------
+
+ 1. There has been some re-arrangement of the code for the match() function so
+    that it can be compiled in a version that does not call itself recursively.
+    Instead, it keeps those local variables that need separate instances for
+    each "recursion" in a frame on the heap, and gets/frees frames whenever it
+    needs to "recurse". Keeping track of where control must go is done by means
+    of setjmp/longjmp. The whole thing is implemented by a set of macros that
+    hide most of the details from the main code, and operates only if
+    NO_RECURSE is defined while compiling pcre.c. If PCRE is built using the
+    "configure" mechanism, "--disable-stack-for-recursion" turns on this way of
+    operating.
+
+    To make it easier for callers to provide specially tailored get/free
+    functions for this usage, two new functions, pcre_stack_malloc, and
+    pcre_stack_free, are used. They are always called in strict stacking order,
+    and the size of block requested is always the same.
+
+    The PCRE_CONFIG_STACKRECURSE info parameter can be used to find out whether
+    PCRE has been compiled to use the stack or the heap for recursion. The
+    -C option of pcretest uses this to show which version is compiled.
+
+    A new data escape \S, is added to pcretest; it causes the amounts of store
+    obtained and freed by both kinds of malloc/free at match time to be added
+    to the output.
+
+ 2. Changed the locale test to use "fr_FR" instead of "fr" because that's
+    what's available on my current Linux desktop machine.
+
+ 3. When matching a UTF-8 string, the test for a valid string at the start has
+    been extended. If start_offset is not zero, PCRE now checks that it points
+    to a byte that is the start of a UTF-8 character. If not, it returns
+    PCRE_ERROR_BADUTF8_OFFSET (-11). Note: the whole string is still checked;
+    this is necessary because there may be backward assertions in the pattern.
+    When matching the same subject several times, it may save resources to use
+    PCRE_NO_UTF8_CHECK on all but the first call if the string is long.
+
+ 4. The code for checking the validity of UTF-8 strings has been tightened so
+    that it rejects (a) strings containing 0xfe or 0xff bytes and (b) strings
+    containing "overlong sequences".
+
+ 5. Fixed a bug (appearing twice) that I could not find any way of exploiting!
+    I had written "if ((digitab[*p++] && chtab_digit) == 0)" where the "&&"
+    should have been "&", but it just so happened that all the cases this let
+    through by mistake were picked up later in the function.
+
+ 6. I had used a variable called "isblank" - this is a C99 function, causing
+    some compilers to warn. To avoid this, I renamed it (as "blankclass").
+
+ 7. Cosmetic: (a) only output another newline at the end of pcretest if it is
+    prompting; (b) run "./pcretest /dev/null" at the start of the test script
+    so the version is shown; (c) stop "make test" echoing "./RunTest".
+
+ 8. Added patches from David Burgess to enable PCRE to run on EBCDIC systems.
+
+ 9. The prototype for memmove() for systems that don't have it was using
+    size_t, but the inclusion of the header that defines size_t was later. I've
+    moved the #includes for the C headers earlier to avoid this.
+
+10. Added some adjustments to the code to make it easier to compiler on certain
+    special systems:
+
+      (a) Some "const" qualifiers were missing.
+      (b) Added the macro EXPORT before all exported functions; by default this
+          is defined to be empty.
+      (c) Changed the dftables auxiliary program (that builds chartables.c) so
+          that it reads its output file name as an argument instead of writing
+          to the standard output and assuming this can be redirected.
+
+11. In UTF-8 mode, if a recursive reference (e.g. (?1)) followed a character
+    class containing characters with values greater than 255, PCRE compilation
+    went into a loop.
+
+12. A recursive reference to a subpattern that was within another subpattern
+    that had a minimum quantifier of zero caused PCRE to crash. For example,
+    (x(y(?2))z)? provoked this bug with a subject that got as far as the
+    recursion. If the recursively-called subpattern itself had a zero repeat,
+    that was OK.
+
+13. In pcretest, the buffer for reading a data line was set at 30K, but the
+    buffer into which it was copied (for escape processing) was still set at
+    1024, so long lines caused crashes.
+
+14. A pattern such as /[ab]{1,3}+/ failed to compile, giving the error
+    "internal error: code overflow...". This applied to any character class
+    that was followed by a possessive quantifier.
+
+15. Modified the Makefile to add libpcre.la as a prerequisite for
+    libpcreposix.la because I was told this is needed for a parallel build to
+    work.
+
+16. If a pattern that contained .* following optional items at the start was
+    studied, the wrong optimizing data was generated, leading to matching
+    errors. For example, studying /[ab]*.*c/ concluded, erroneously, that any
+    matching string must start with a or b or c. The correct conclusion for
+    this pattern is that a match can start with any character.
+
+
+Version 4.4 13-Aug-03
+---------------------
+
+ 1. In UTF-8 mode, a character class containing characters with values between
+    127 and 255 was not handled correctly if the compiled pattern was studied.
+    In fixing this, I have also improved the studying algorithm for such
+    classes (slightly).
+
+ 2. Three internal functions had redundant arguments passed to them. Removal
+    might give a very teeny performance improvement.
+
+ 3. Documentation bug: the value of the capture_top field in a callout is *one
+    more than* the number of the hightest numbered captured substring.
+
+ 4. The Makefile linked pcretest and pcregrep with -lpcre, which could result
+    in incorrectly linking with a previously installed version. They now link
+    explicitly with libpcre.la.
+
+ 5. configure.in no longer needs to recognize Cygwin specially.
+
+ 6. A problem in pcre.in for Windows platforms is fixed.
+
+ 7. If a pattern was successfully studied, and the -d (or /D) flag was given to
+    pcretest, it used to include the size of the study block as part of its
+    output. Unfortunately, the structure contains a field that has a different
+    size on different hardware architectures. This meant that the tests that
+    showed this size failed. As the block is currently always of a fixed size,
+    this information isn't actually particularly useful in pcretest output, so
+    I have just removed it.
+
+ 8. Three pre-processor statements accidentally did not start in column 1.
+    Sadly, there are *still* compilers around that complain, even though
+    standard C has not required this for well over a decade. Sigh.
+
+ 9. In pcretest, the code for checking callouts passed small integers in the
+    callout_data field, which is a void * field. However, some picky compilers
+    complained about the casts involved for this on 64-bit systems. Now
+    pcretest passes the address of the small integer instead, which should get
+    rid of the warnings.
+
+10. By default, when in UTF-8 mode, PCRE now checks for valid UTF-8 strings at
+    both compile and run time, and gives an error if an invalid UTF-8 sequence
+    is found. There is a option for disabling this check in cases where the
+    string is known to be correct and/or the maximum performance is wanted.
+
+11. In response to a bug report, I changed one line in Makefile.in from
+
+        -Wl,--out-implib,.libs/lib@WIN_PREFIX@pcreposix.dll.a \
+    to
+        -Wl,--out-implib,.libs/@WIN_PREFIX@libpcreposix.dll.a \
+
+    to look similar to other lines, but I have no way of telling whether this
+    is the right thing to do, as I do not use Windows. No doubt I'll get told
+    if it's wrong...
+
+
 Version 4.3 21-May-03
 ---------------------

--- a/ext/pcre/pcrelib/INSTALL
+++ b/ext/pcre/pcrelib/INSTALL
@ -1,185 +0,0 @@
-Basic Installation
-==================
-
-   These are generic installation instructions that apply to systems that
-can run the `configure' shell script - Unix systems and any that imitate
-it. They are not specific to PCRE. There are PCRE-specific instructions
-for non-Unix systems in the file NON-UNIX-USE.
-
-   The `configure' shell script attempts to guess correct values for
-various system-dependent variables used during compilation.  It uses
-those values to create a `Makefile' in each directory of the package.
-It may also create one or more `.h' files containing system-dependent
-definitions.  Finally, it creates a shell script `config.status' that
-you can run in the future to recreate the current configuration, a file
-`config.cache' that saves the results of its tests to speed up
-reconfiguring, and a file `config.log' containing compiler output
-(useful mainly for debugging `configure').
-
-   If you need to do unusual things to compile the package, please try
-to figure out how `configure' could check whether to do them, and mail
-diffs or instructions to the address given in the `README' so they can
-be considered for the next release.  If at some point `config.cache'
-contains results you don't want to keep, you may remove or edit it.
-
-   The file `configure.in' is used to create `configure' by a program
-called `autoconf'.  You only need `configure.in' if you want to change
-it or regenerate `configure' using a newer version of `autoconf'.
-
-The simplest way to compile this package is:
-
-  1. `cd' to the directory containing the package's source code and type
-     `./configure' to configure the package for your system.  If you're
-     using `csh' on an old version of System V, you might need to type
-     `sh ./configure' instead to prevent `csh' from trying to execute
-     `configure' itself.
-
-     Running `configure' takes awhile.  While running, it prints some
-     messages telling which features it is checking for.
-
-  2. Type `make' to compile the package.
-
-  3. Optionally, type `make check' to run any self-tests that come with
-     the package.
-
-  4. Type `make install' to install the programs and any data files and
-     documentation.
-
-  5. You can remove the program binaries and object files from the
-     source code directory by typing `make clean'.  To also remove the
-     files that `configure' created (so you can compile the package for
-     a different kind of computer), type `make distclean'.  There is
-     also a `make maintainer-clean' target, but that is intended mainly
-     for the package's developers.  If you use it, you may have to get
-     all sorts of other programs in order to regenerate files that came
-     with the distribution.
-
-Compilers and Options
-=====================
-
-   Some systems require unusual options for compilation or linking that
-the `configure' script does not know about.  You can give `configure'
-initial values for variables by setting them in the environment.  Using
-a Bourne-compatible shell, you can do that on the command line like
-this:
-     CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure
-
-Or on systems that have the `env' program, you can do it like this:
-     env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure
-
-Compiling For Multiple Architectures
-====================================
-
-   You can compile the package for more than one kind of computer at the
-same time, by placing the object files for each architecture in their
-own directory.  To do this, you must use a version of `make' that
-supports the `VPATH' variable, such as GNU `make'.  `cd' to the
-directory where you want the object files and executables to go and run
-the `configure' script.  `configure' automatically checks for the
-source code in the directory that `configure' is in and in `..'.
-
-   If you have to use a `make' that does not supports the `VPATH'
-variable, you have to compile the package for one architecture at a time
-in the source code directory.  After you have installed the package for
-one architecture, use `make distclean' before reconfiguring for another
-architecture.
-
-Installation Names
-==================
-
-   By default, `make install' will install the package's files in
-`/usr/local/bin', `/usr/local/man', etc.  You can specify an
-installation prefix other than `/usr/local' by giving `configure' the
-option `--prefix=PATH'.
-
-   You can specify separate installation prefixes for
-architecture-specific files and architecture-independent files.  If you
-give `configure' the option `--exec-prefix=PATH', the package will use
-PATH as the prefix for installing programs and libraries.
-Documentation and other data files will still use the regular prefix.
-
-   In addition, if you use an unusual directory layout you can give
-options like `--bindir=PATH' to specify different values for particular
-kinds of files.  Run `configure --help' for a list of the directories
-you can set and what kinds of files go in them.
-
-   If the package supports it, you can cause programs to be installed
-with an extra prefix or suffix on their names by giving `configure' the
-option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
-
-Optional Features
-=================
-
-   Some packages pay attention to `--enable-FEATURE' options to
-`configure', where FEATURE indicates an optional part of the package.
-They may also pay attention to `--with-PACKAGE' options, where PACKAGE
-is something like `gnu-as' or `x' (for the X Window System).  The
-`README' should mention any `--enable-' and `--with-' options that the
-package recognizes.
-
-   For packages that use the X Window System, `configure' can usually
-find the X include and library files automatically, but if it doesn't,
-you can use the `configure' options `--x-includes=DIR' and
-`--x-libraries=DIR' to specify their locations.
-
-Specifying the System Type
-==========================
-
-   There may be some features `configure' can not figure out
-automatically, but needs to determine by the type of host the package
-will run on.  Usually `configure' can figure that out, but if it prints
-a message saying it can not guess the host type, give it the
-`--host=TYPE' option.  TYPE can either be a short name for the system
-type, such as `sun4', or a canonical name with three fields:
-     CPU-COMPANY-SYSTEM
-
-See the file `config.sub' for the possible values of each field.  If
-`config.sub' isn't included in this package, then this package doesn't
-need to know the host type.
-
-   If you are building compiler tools for cross-compiling, you can also
-use the `--target=TYPE' option to select the type of system they will
-produce code for and the `--build=TYPE' option to select the type of
-system on which you are compiling the package.
-
-Sharing Defaults
-================
-
-   If you want to set default values for `configure' scripts to share,
-you can create a site shell script called `config.site' that gives
-default values for variables like `CC', `cache_file', and `prefix'.
-`configure' looks for `PREFIX/share/config.site' if it exists, then
-`PREFIX/etc/config.site' if it exists.  Or, you can set the
-`CONFIG_SITE' environment variable to the location of the site script.
-A warning: not all `configure' scripts look for a site script.
-
-Operation Controls
-==================
-
-   `configure' recognizes the following options to control how it
-operates.
-
-`--cache-file=FILE'
-     Use and save the results of the tests in FILE instead of
-     `./config.cache'.  Set FILE to `/dev/null' to disable caching, for
-     debugging `configure'.
-
-`--help'
-     Print a summary of the options to `configure', and exit.
-
-`--quiet'
-`--silent'
-`-q'
-     Do not print messages saying which checks are being made.  To
-     suppress all normal output, redirect it to `/dev/null' (any error
-     messages will still be shown).
-
-`--srcdir=DIR'
-     Look for the package's source code in directory DIR.  Usually
-     `configure' can determine that directory automatically.
-
-`--version'
-     Print the version of Autoconf used to generate the `configure'
-     script, and exit.
-
-`configure' also accepts some other, not widely useful, options.
--- a/ext/pcre/pcrelib/LICENCE
+++ b/ext/pcre/pcrelib/LICENCE
@ -9,7 +9,7 @@ Written by: Philip Hazel <ph10@cam.ac.uk>
 University of Cambridge Computing Service,
 Cambridge, England. Phone: +44 1223 334714.

-Copyright (c) 1997-2001 University of Cambridge
+Copyright (c) 1997-2003 University of Cambridge

 Permission is granted to anyone to use this software for any purpose on any
 computer system, and to redistribute it freely, subject to the following
--- a/ext/pcre/pcrelib/NEWS
+++ b/ext/pcre/pcrelib/NEWS
@ -1,6 +1,37 @@
 News about PCRE releases
 ------------------------

+Release 4.5 01-Dec-03
+---------------------
+
+Again mainly a bug-fix and tidying release, with only a couple of new features:
+
+1. It's possible now to compile PCRE so that it does not use recursive
+function calls when matching. Instead it gets memory from the heap. This slows
+things down, but may be necessary on systems with limited stacks.
+
+2. UTF-8 string checking has been tightened to reject overlong sequences and to
+check that a starting offset points to the start of a character. Failure of the
+latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
+
+3. PCRE can now be compiled for systems that use EBCDIC code.
+
+
+Release 4.4 21-Aug-03
+---------------------
+
+This is mainly a bug-fix and tidying release. The only new feature is that PCRE
+checks UTF-8 strings for validity by default. There is an option to suppress
+this, just in case anybody wants that teeny extra bit of performance.
+
+
+Releases 4.1 - 4.3
+------------------
+
+Sorry, I forgot about updating the NEWS file for these releases. Please take a
+look at ChangeLog.
+
+
 Release 4.0 17-Feb-03
 ---------------------

--- a/ext/pcre/pcrelib/NON-UNIX-USE
+++ b/ext/pcre/pcrelib/NON-UNIX-USE
@ -1,12 +1,19 @@
 Compiling PCRE on non-Unix systems
 ----------------------------------

-See below for comments on Cygwin or MinGW usage.
+See below for comments on Cygwin or MinGW usage. I (Philip Hazel) have no
+knowledge of Windows sytems and how their libraries work. The items in the
+PCRE Makefile that relate to anything other than Unix-like systems have been
+contributed by PCRE users. There are some other comments and files in the
+Contrib directory on the ftp site that you may find useful.

-If you want to compile PCRE for a non-Unix system, note that it consists
-entirely of code written in Standard C, and so should compile successfully
-on any machine with a Standard C compiler and library, using normal compiling
-commands to do the following:
+The following are generic comments about building PCRE:
+
+If you want to compile PCRE for a non-Unix system (or perhaps, more strictly,
+for a system that does not support "configure" and make files), note that PCRE
+consists entirely of code written in Standard C, and so should compile
+successfully on any machine with a Standard C compiler and library, using
+normal compiling commands to do the following:

 (1) Copy or rename the file config.in as config.h, and change the macros that
 define HAVE_STRERROR and HAVE_MEMMOVE to define them as 1 rather than 0.
@ -21,15 +28,17 @@ for PCRE_MAJOR, PCRE_MINOR, and PCRE_DATE near its start to the values set in
 configure.in.

 (3) Compile dftables.c as a stand-alone program, and then run it with
-the standard output sent to chartables.c. This generates a set of standard
-character tables.
+the single argument "chartables.c". This generates a set of standard
+character tables and writes them to that file.

 (4) Compile maketables.c, get.c, study.c and pcre.c and link them all
 together into an object library in whichever form your system keeps such
 libraries. This is the pcre library (chartables.c is included by means of an
-#include directive).
+#include directive). If your system has static and shared libraries, you may
+have to do this once for each type.

-(5) Similarly, compile pcreposix.c and link it as the pcreposix library.
+(5) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
+library.

 (6) Compile the test program pcretest.c. This needs the functions in the
 pcre and pcreposix libraries when linking.
@ -79,7 +88,7 @@ These are some further comments about Win32 builds from Mark Evans. They
 were contributed before Fred Cox's changes were made, so it is possible that
 they may no longer be relevant.

-The documentation for Win32 builds is a bit shy.  Under MSVC6 I
+"The documentation for Win32 builds is a bit shy.  Under MSVC6 I
 followed their instructions to the letter, but there were still
 some things missing.

@ -89,7 +98,7 @@ some things missing.

 (2) Missing some #ifdefs relating to the function pointers
    pcre_malloc and pcre_free.  See my solution below.  (The stubs
-    may not be mandatory but they made me feel better.)
+    may not be mandatory but they made me feel better.)"

 =========================
 #ifdef _WIN32
--- a/ext/pcre/pcrelib/README
+++ b/ext/pcre/pcrelib/README
@ -16,6 +16,11 @@ regex.h, but I didn't want to risk possible problems with existing files of
 that name by distributing it that way. To use it with an existing program that
 uses the POSIX API, it will have to be renamed or pointed at by a link.

+If you are using the POSIX interface to PCRE and there is already a POSIX regex
+library installed on your system, you must take care when linking programs to
+ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
+up the "real" POSIX functions of the same name.
+

 Contributions by users of PCRE
 ------------------------------
@ -96,6 +101,16 @@ library. You can read more about them in the pcrebuild man page.
  is a representation of the compiled pattern, and this changes with the link
  size.

+. You can build PCRE so that its match() function does not call itself
+  recursively. Instead, it uses blocks of data from the heap via special
+  functions pcre_stack_malloc() and pcre_stack_free() to save data that would
+  otherwise be saved on the stack. To build PCRE like this, use
+
+  --disable-stack-for-recursion
+
+  on the "configure" command. PCRE runs more slowly in this mode, but it may be
+  necessary in environments with limited stack sizes.
+
 The "configure" script builds five files:

 . libtool is a script that builds shared and/or static libraries
@ -125,16 +140,6 @@ included in makefiles for programs that use PCRE, saving the programmer from
 having to remember too many details.


-Cross-compiling PCRE on a Unix-like system
------------------------------------------
-
-PCRE needs to compile and run an auxiliary program as part of the building
-process. Obviously, if the real compilation is for some other system, it can't
-use the same CC and CFLAGS values when it is doing this. For cross compilation,
-therefore, you must set CC_FOR_BUILD to the local host's compiler, and you can
-set flags in CFLAGS_FOR_BUILD if you need to.
-
-
 Shared libraries on Unix-like systems
 -------------------------------------

@ -169,17 +174,20 @@ order to cross-compile PCRE for some other host. However, during the building
 process, the dftables.c source file is compiled *and run* on the local host, in
 order to generate the default character tables (the chartables.c file). It
 therefore needs to be compiled with the local compiler, not the cross compiler.
-You can do this by specifying HOST_CC (and if necessary HOST_CFLAGS) when
-calling the "configure" command. If they are not specified, they default to the
-values of CC and CFLAGS.
+You can do this by specifying CC_FOR_BUILD (and if necessary CFLAGS_FOR_BUILD)
+when calling the "configure" command. If they are not specified, they default
+to the values of CC and CFLAGS.


 Building on non-Unix systems
 ----------------------------

-For a non-Unix system, read the comments in the file NON-UNIX-USE. PCRE has
-been compiled on Windows systems and on Macintoshes, but I don't know the
-details because I don't use those systems. It should be straightforward to
+For a non-Unix system, read the comments in the file NON-UNIX-USE, though if
+the system supports the use of "configure" and "make" you may be able to build
+PCRE in the same way as for Unix systems.
+
+PCRE has been compiled on Windows systems and on Macintoshes, but I don't know
+the details because I don't use those systems. It should be straightforward to
 build PCRE on any system that has a Standard C compiler, because it uses only
 Standard C functions.

@ -189,7 +197,7 @@ Testing PCRE

 To test PCRE on a Unix system, run the RunTest script that is created by the
 configuring process. (This can also be run by "make runtest", "make check", or
-"make test".) For other systems, see the instruction in NON-UNIX-USE.
+"make test".) For other systems, see the instructions in NON-UNIX-USE.

 The script runs the pcretest test program (which is documented in its own man
 page) on each of the testinput files (in the testdata directory) in turn,
@ -222,13 +230,13 @@ bug in PCRE.

 The third set of tests checks pcre_maketables(), the facility for building a
 set of character tables for a specific locale and using them instead of the
-default tables. The tests make use of the "fr" (French) locale. Before running
-the test, the script checks for the presence of this locale by running the
-"locale" command. If that command fails, or if it doesn't include "fr" in the
-list of available locales, the third test cannot be run, and a comment is
-output to say why. If running this test produces instances of the error
+default tables. The tests make use of the "fr_FR" (French) locale. Before
+running the test, the script checks for the presence of this locale by running
+the "locale" command. If that command fails, or if it doesn't include "fr_FR"
+in the list of available locales, the third test cannot be run, and a comment
+is output to say why. If running this test produces instances of the error

-  ** Failed to set locale "fr"
+  ** Failed to set locale "fr_FR"

 in the comparison output, it means that locale is not available on your system,
 despite being listed by "locale". This does not mean that PCRE is broken.
@ -354,4 +362,4 @@ The distribution should contain the following files:
  makevp.bat

 Philip Hazel <ph10@cam.ac.uk>
-February 2003
+December 2003
--- a/ext/pcre/pcrelib/dftables.c
+++ b/ext/pcre/pcrelib/dftables.c
@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language.

 Written by: Philip Hazel <ph10@cam.ac.uk>

-           Copyright (c) 1997-2001 University of Cambridge
+           Copyright (c) 1997-2003 University of Cambridge

 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@ -50,69 +50,83 @@ order to be consistent. */
 #include "maketables.c"


-int main(void)
+int main(int argc, char **argv)
 {
 int i;
+FILE *f;
 const unsigned char *tables = pcre_maketables();

-/* There are two printf() calls here, because gcc in pedantic mode complains
+if (argc != 2)
+  {
+  fprintf(stderr, "dftables: one filename argument is required\n");
+  return 1;
+  }
+
+f = fopen(argv[1], "w");
+if (f == NULL)
+  {
+  fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
+  return 1;
+  }
+
+/* There are two fprintf() calls here, because gcc in pedantic mode complains
 about the very long string otherwise. */

-printf(
+fprintf(f,
  "/*************************************************\n"
  "*      Perl-Compatible Regular Expressions       *\n"
  "*************************************************/\n\n"
  "/* This file is automatically written by the dftables auxiliary \n"
  "program. If you edit it by hand, you might like to edit the Makefile to \n"
  "prevent its ever being regenerated.\n\n");
-printf(
+fprintf(f,
  "This file is #included in the compilation of pcre.c to build the default\n"
  "character tables which are used when no tables are passed to the compile\n"
  "function. */\n\n"
  "static unsigned char pcre_default_tables[] = {\n\n"
  "/* This table is a lower casing table. */\n\n");

-printf("  ");
+fprintf(f, "  ");
 for (i = 0; i < 256; i++)
  {
-  if ((i & 7) == 0 && i != 0) printf("\n  ");
-  printf("%3d", *tables++);
-  if (i != 255) printf(",");
+  if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
+  fprintf(f, "%3d", *tables++);
+  if (i != 255) fprintf(f, ",");
  }
-printf(",\n\n");
+fprintf(f, ",\n\n");

-printf("/* This table is a case flipping table. */\n\n");
+fprintf(f, "/* This table is a case flipping table. */\n\n");

-printf("  ");
+fprintf(f, "  ");
 for (i = 0; i < 256; i++)
  {
-  if ((i & 7) == 0 && i != 0) printf("\n  ");
-  printf("%3d", *tables++);
-  if (i != 255) printf(",");
+  if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
+  fprintf(f, "%3d", *tables++);
+  if (i != 255) fprintf(f, ",");
  }
-printf(",\n\n");
+fprintf(f, ",\n\n");

-printf(
+fprintf(f,
  "/* This table contains bit maps for various character classes.\n"
  "Each map is 32 bytes long and the bits run from the least\n"
  "significant end of each byte. The classes that have their own\n"
  "maps are: space, xdigit, digit, upper, lower, word, graph\n"
  "print, punct, and cntrl. Other classes are built from combinations. */\n\n");

-printf("  ");
+fprintf(f, "  ");
 for (i = 0; i < cbit_length; i++)
  {
  if ((i & 7) == 0 && i != 0)
    {
-    if ((i & 31) == 0) printf("\n");
-    printf("\n  ");
+    if ((i & 31) == 0) fprintf(f, "\n");
+    fprintf(f, "\n  ");
    }
-  printf("0x%02x", *tables++);
-  if (i != cbit_length - 1) printf(",");
+  fprintf(f, "0x%02x", *tables++);
+  if (i != cbit_length - 1) fprintf(f, ",");
  }
-printf(",\n\n");
+fprintf(f, ",\n\n");

-printf(
+fprintf(f,
  "/* This table identifies various classes of character by individual bits:\n"
  "  0x%02x   white space character\n"
  "  0x%02x   letter\n"
@ -123,29 +137,30 @@ printf(
  ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
  ctype_meta);

-printf("  ");
+fprintf(f, "  ");
 for (i = 0; i < 256; i++)
  {
  if ((i & 7) == 0 && i != 0)
    {
-    printf(" /* ");
-    if (isprint(i-8)) printf(" %c -", i-8);
-      else printf("%3d-", i-8);
-    if (isprint(i-1)) printf(" %c ", i-1);
-      else printf("%3d", i-1);
-    printf(" */\n  ");
+    fprintf(f, " /* ");
+    if (isprint(i-8)) fprintf(f, " %c -", i-8);
+      else fprintf(f, "%3d-", i-8);
+    if (isprint(i-1)) fprintf(f, " %c ", i-1);
+      else fprintf(f, "%3d", i-1);
+    fprintf(f, " */\n  ");
    }
-  printf("0x%02x", *tables++);
-  if (i != 255) printf(",");
+  fprintf(f, "0x%02x", *tables++);
+  if (i != 255) fprintf(f, ",");
  }

-printf("};/* ");
-if (isprint(i-8)) printf(" %c -", i-8);
-  else printf("%3d-", i-8);
-if (isprint(i-1)) printf(" %c ", i-1);
-  else printf("%3d", i-1);
-printf(" */\n\n/* End of chartables.c */\n");
+fprintf(f, "};/* ");
+if (isprint(i-8)) fprintf(f, " %c -", i-8);
+  else fprintf(f, "%3d-", i-8);
+if (isprint(i-1)) fprintf(f, " %c ", i-1);
+  else fprintf(f, "%3d", i-1);
+fprintf(f, " */\n\n/* End of chartables.c */\n");

+fclose(f);
 return 0;
 }

--- a/ext/pcre/pcrelib/dll.mk
+++ b/ext/pcre/pcrelib/dll.mk
@ -1,60 +0,0 @@
-# dll.mk - auxilary Makefile to easy build dll's for mingw32 target
-# ver. 0.6 of 1999-03-25
-#
-# Homepage of this makefile - http://www.is.lg.ua/~paul/devel/
-# Homepage of original mingw32 project -
-#		      http://www.fu.is.saga-u.ac.jp/~colin/gcc.html
-#
-# How to use:
-# This makefile can:
-# 1. Create automatical .def file from list of objects
-# 2. Create .dll from objects and .def file, either automatical, or your
-#    hand-written (maybe) file, which must have same basename as dll
-# WARNING! There MUST be object, which name match dll's name. Make sux.
-# 3. Create import library from .def (as for .dll, only its name required,
-#    not dll itself)
-#    By convention implibs for dll have .dll.a suffix, e.g. libstuff.dll.a
-#    Why not just libstuff.a? 'Cos that's name for static lib, ok?
-# Process divided into 3 phases because:
-# 1. Pre-existent .def possible
-# 2. Generating implib is enough time-consuming
-#
-# Variables:
-#   DLL_LDLIBS  - libs for linking dll
-#   DLL_LDFLAGS - flags for linking dll
-#
-# By using $(DLL_SUFFIX) instead of 'dll', e.g. stuff.$(DLL_SUFFIX)
-# you may help porting makefiles to other platforms
-#
-# Put this file in your make's include path (e.g. main include dir, for
-# more information see include section in make doc). Put in the beginning
-# of your own Makefile line "include dll.mk". Specify dependences, e.g.:
-#
-# Do all stuff in one step
-# libstuff.dll.a: $(OBJECTS) stuff.def
-# stuff.def: $(OBJECTS)
-#
-# Steps separated, pre-provided .def, link with user32
-#
-# DLL_LDLIBS=-luser32
-# stuff.dll: $(OBJECTS)
-# libstuff.dll.a: $(OBJECTS)
-
-
-DLLWRAP=dllwrap
-DLLTOOL=dlltool
-
-DLL_SUFFIX=dll
-
-.SUFFIXES: .o .$(DLL_SUFFIX)
-
-_%.def: %.o
-      $(DLLTOOL) --export-all --output-def $@ $^
-
-%.$(DLL_SUFFIX): %.o
-      $(DLLWRAP) --dllname $(notdir $@) --driver-name $(CC) --def $*.def -o $@ $(filter %.o,$^) $(DLL_LDFLAGS) $(DLL_LDLIBS)
-
-lib%.$(DLL_SUFFIX).a:%.def
-      $(DLLTOOL) --dllname $(notdir $*.dll) --def $< --output-lib $@
-
-# End
--- a/ext/pcre/pcrelib/doc/Tech.Notes
+++ b/ext/pcre/pcrelib/doc/Tech.Notes
@ -48,7 +48,9 @@ These items are all just one byte long

  OP_END                 end of pattern
  OP_ANY                 match any character
+  OP_ANYBYTE             match any single byte, even in UTF-8 mode 
  OP_SOD                 match start of data: \A
+  OP_SOM,                start of match (subject + offset): \G
  OP_CIRC                ^ (start of data, or after \n in multiline)
  OP_NOT_WORD_BOUNDARY   \W
  OP_WORD_BOUNDARY       \w
@ -61,7 +63,6 @@ These items are all just one byte long
  OP_EODN                match end of data or \n at end: \Z
  OP_EOD                 match end of data: \z
  OP_DOLL                $ (end of data, or before \n in multiline)
-  OP_RECURSE             match the pattern recursively


 Repeating single characters
@ -119,8 +120,7 @@ instances of OP_CHARS are used.
 Character classes
 -----------------

-When characters less than 256 are involved, OP_CLASS is used for a character
-class. If there is only one character, OP_CHARS is used for a positive class,
+If there is only one character, OP_CHARS is used for a positive class,
 and OP_NOT for a negative one (that is, for something like [^a]). However, in 
 UTF-8 mode, this applies only to characters with values < 128, because OP_NOT 
 is confined to single bytes.
@ -129,9 +129,15 @@ Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a repeated,
 negated, single-character class. The normal ones (OP_STAR etc.) are used for a
 repeated positive single-character class.

-OP_CLASS is followed by a 32-byte bit map containing a 1 bit for every
-character that is acceptable. The bits are counted from the least significant
-end of each byte.
+When there's more than one character in a class and all the characters are less
+than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a negative 
+one. In either case, the opcode is followed by a 32-byte bit map containing a 1
+bit for every character that is acceptable. The bits are counted from the least
+significant end of each byte.
+
+The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 mode, 
+subject characters with values greater than 256 can be handled correctly. For 
+OP_CLASS they don't match, whereas for OP_NCLASS they do.

 For classes containing characters with values > 255, OP_XCLASS is used. It
 optionally uses a bit map (if any characters lie within it), followed by a list
@ -243,6 +249,21 @@ same scheme is used, with a "reference number" of 0xffff. Otherwise, a
 conditional subpattern always starts with one of the assertions.


+Recursion
+---------
+
+Recursion either matches the current regex, or some subexpression. The opcode
+OP_RECURSE is followed by an value which is the offset to the starting bracket
+from the start of the whole pattern.
+
+
+Callout
+-------
+
+OP_CALLOUT is followed by one byte of data that holds a callout number in the 
+range 0 to 255.
+
+
 Changing options
 ----------------

@ -257,4 +278,4 @@ at compile time, and so does not cause anything to be put into the compiled
 data.

 Philip Hazel
-August 2002
+August 2003
--- a/ext/pcre/pcrelib/doc/pcre.txt
+++ b/ext/pcre/pcrelib/doc/pcre.txt
--- a/ext/pcre/pcrelib/internal.h
+++ b/ext/pcre/pcrelib/internal.h
@ -45,6 +45,10 @@ modules, but which are not relevant to the outside. */
 # include "php_config.h"
 #endif

+#ifndef PCRE_SPY
+#define PCRE_DEFINITION       /* Win32 __declspec(export) trigger for .dll */
+#endif
+
 /* The value of NEWLINE determines the newline character. The default is to
 leave it up to the compiler, but some sites want to force a particular value.
 On Unix systems, "configure" can be used to override this default. */
@ -65,6 +69,14 @@ default default. */
 #define MATCH_LIMIT 10000000
 #endif

+/* If you are compiling for a system that needs some magic to be inserted
+ * before the definition of an exported function, define this macro to contain
+ * the relevant magic. It apears at the start of every exported function. */
+                                                                                                                                
+#define EXPORT
+
+#include "pcre.h"
+
 /* When compiling for use with the Virtual Pascal compiler, these functions
 need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
 option on the command line. */
@ -83,6 +95,18 @@ neither (there some non-Unix environments where this is the case). This assumes
 that all calls to memmove are moving strings upwards in store, which is the
 case in PCRE. */

+/* Standard C headers plus the external interface definition. The only time
+setjmp and stdarg are used is when NO_RECURSE is set. */
+
+#include <ctype.h>
+#include <limits.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
 #if ! HAVE_MEMMOVE
 #undef  memmove        /* some systems may have a macro */
 #if HAVE_BCOPY
@ -177,21 +201,6 @@ capturing parenthesis numbers in back references. */
 #define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2


-/* Standard C headers plus the external interface definition */
-
-#include <ctype.h>
-#include <limits.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef PCRE_SPY
-#define PCRE_DEFINITION       /* Win32 __declspec(export) trigger for .dll */
-#endif
-
-#include "pcre.h"
-
 /* In case there is no definition of offsetof() provided - though any proper
 Standard C system should have one. */

@ -224,10 +233,10 @@ time, run time or study time, respectively. */
 #define PUBLIC_OPTIONS \
  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
-   PCRE_NO_AUTO_CAPTURE)
+   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK)

 #define PUBLIC_EXEC_OPTIONS \
-  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY)
+  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK)

 #define PUBLIC_STUDY_OPTIONS 0   /* None defined */

@ -381,40 +390,40 @@ enum {
                           class - the difference is relevant only when a UTF-8
                           character > 255 is encountered. */

-  OP_XCLASS,         /* 56 Extended class for handling UTF-8 chars within the
+  OP_XCLASS,         /* 57 Extended class for handling UTF-8 chars within the
                           class. This does both positive and negative. */

-  OP_REF,            /* 57 Match a back reference */
-  OP_RECURSE,        /* 58 Match a numbered subpattern (possibly recursive) */
-  OP_CALLOUT,        /* 59 Call out to external function if provided */
+  OP_REF,            /* 58 Match a back reference */
+  OP_RECURSE,        /* 59 Match a numbered subpattern (possibly recursive) */
+  OP_CALLOUT,        /* 60 Call out to external function if provided */

-  OP_ALT,            /* 60 Start of alternation */
-  OP_KET,            /* 61 End of group that doesn't have an unbounded repeat */
-  OP_KETRMAX,        /* 62 These two must remain together and in this */
-  OP_KETRMIN,        /* 63 order. They are for groups the repeat for ever. */
+  OP_ALT,            /* 61 Start of alternation */
+  OP_KET,            /* 62 End of group that doesn't have an unbounded repeat */
+  OP_KETRMAX,        /* 63 These two must remain together and in this */
+  OP_KETRMIN,        /* 64 order. They are for groups the repeat for ever. */

  /* The assertions must come before ONCE and COND */

-  OP_ASSERT,         /* 64 Positive lookahead */
-  OP_ASSERT_NOT,     /* 65 Negative lookahead */
-  OP_ASSERTBACK,     /* 66 Positive lookbehind */
-  OP_ASSERTBACK_NOT, /* 67 Negative lookbehind */
-  OP_REVERSE,        /* 68 Move pointer back - used in lookbehind assertions */
+  OP_ASSERT,         /* 65 Positive lookahead */
+  OP_ASSERT_NOT,     /* 66 Negative lookahead */
+  OP_ASSERTBACK,     /* 67 Positive lookbehind */
+  OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */
+  OP_REVERSE,        /* 69 Move pointer back - used in lookbehind assertions */

  /* ONCE and COND must come after the assertions, with ONCE first, as there's
  a test for >= ONCE for a subpattern that isn't an assertion. */

-  OP_ONCE,           /* 69 Once matched, don't back up into the subpattern */
-  OP_COND,           /* 70 Conditional group */
-  OP_CREF,           /* 71 Used to hold an extraction string number (cond ref) */
+  OP_ONCE,           /* 70 Once matched, don't back up into the subpattern */
+  OP_COND,           /* 71 Conditional group */
+  OP_CREF,           /* 72 Used to hold an extraction string number (cond ref) */

-  OP_BRAZERO,        /* 72 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 73 order. */
+  OP_BRAZERO,        /* 73 These two must remain together and in this */
+  OP_BRAMINZERO,     /* 74 order. */

-  OP_BRANUMBER,      /* 74 Used for extracting brackets whose number is greater
+  OP_BRANUMBER,      /* 75 Used for extracting brackets whose number is greater
                           than can fit into an opcode. */

-  OP_BRA             /* 75 This and greater values are used for brackets that
+  OP_BRA             /* 76 This and greater values are used for brackets that
                           extract substrings up to a basic limit. After that,
                           use is made of OP_BRANUMBER. */
 };
@ -457,10 +466,10 @@ in UTF-8 mode. The code that uses this table must know about such things. */
  1, 1, 1, 1, 2, 1, 1,           /* Any, Anybyte, \Z, \z, Opt, ^, $        */ \
  2,                             /* Chars - the minimum length             */ \
  2,                             /* not                                    */ \
-  /* Positive single-char repeats                                          */ \
-  2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** These are  */ \
-  4, 4, 4,                       /* upto, minupto, exact     ** minima     */ \
-  /* Negative single-char repeats                                          */ \
+  /* Positive single-char repeats                            ** These are  */ \
+  2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \
+  4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \
+  /* Negative single-char repeats - only for chars < 256                   */ \
  2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
  4, 4, 4,                       /* NOT upto, minupto, exact               */ \
  /* Positive type repeats                                                 */ \
@ -552,6 +561,7 @@ just to accommodate the POSIX wrapper. */
 #define ERR41 "unrecognized character after (?P"
 #define ERR42 "syntax error after (?P"
 #define ERR43 "two named groups have the same name"
+#define ERR44 "invalid UTF-8 string"

 /* All character handling must be done as unsigned characters. Otherwise there
 are problems with top-bit-set characters and functions such as isspace().
@ -615,7 +625,7 @@ typedef struct branch_chain {
 call within the pattern. */

 typedef struct recursion_info {
-  struct recursion_info *prev;  /* Previous recursion record (or NULL) */
+  struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
  int group_num;                /* Number of group that was called */
  const uschar *after_call;     /* "Return value": points after the call in the expr */
  const uschar *save_start;     /* Old value of md->start_match */
@ -623,6 +633,16 @@ typedef struct recursion_info {
  int saved_max;                /* Number of saved offsets */
 } recursion_info;

+/* When compiling in a mode that doesn't use recursive calls to match(),
+a structure is used to remember local variables on the heap. It is defined in
+pcre.c, close to the match() function, so that it is easy to keep it in step
+with any changes of local variable. However, the pointer to the current frame
+must be saved in some "static" place over a longjmp(). We declare the
+structure here so that we can put a pointer in the match_data structure.
+NOTE: This isn't used for a "normal" compilation of pcre. */
+
+struct heapframe;
+
 /* Structure for passing "static" information around between the functions
 doing the matching, so that they are thread-safe. */

@ -650,6 +670,7 @@ typedef struct match_data {
  int    start_offset;          /* The start offset value */
  recursion_info *recursive;    /* Linked list of recursion data */
  void  *callout_data;          /* To pass back to callouts */
+  struct heapframe *thisframe;  /* Used only when compiling for no recursion */
 } match_data;

 /* Bit definitions for entries in the pcre_ctypes table. */
--- a/ext/pcre/pcrelib/maketables.c
+++ b/ext/pcre/pcrelib/maketables.c
@ -126,9 +126,13 @@ for (i = 0; i < 256; i++)
  if (isdigit(i)) x += ctype_digit;
  if (isxdigit(i)) x += ctype_xdigit;
  if (isalnum(i) || i == '_') x += ctype_word;
-  if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
-  *p++ = x;
-  }
+
+  /* Note: strchr includes the terminating zero in the characters it considers.
+  In this instance, that is ok because we want binary zero to be flagged as a
+  meta-character, which in this sense is any character that terminates a run
+  of data characters. */
+
+  if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }

 return yield;
 }
--- a/ext/pcre/pcrelib/pcre.c
+++ b/ext/pcre/pcrelib/pcre.c
--- a/ext/pcre/pcrelib/pcre.h
+++ b/ext/pcre/pcrelib/pcre.h
@ -13,8 +13,8 @@ make changes to pcre.in. */
 #include "php_compat.h"

 #define PCRE_MAJOR          4
-#define PCRE_MINOR          3
-#define PCRE_DATE           21-May-2003
+#define PCRE_MINOR          5
+#define PCRE_DATE           01-December-2003

 /* Win32 uses DLL by default */

@ -25,7 +25,7 @@ make changes to pcre.in. */
 #    endif
 #  else
 #    ifndef PCRE_STATIC
-#      define PCRE_DATA_SCOPE __declspec(dllimport)
+#      define PCRE_DATA_SCOPE extern __declspec(dllimport)
 #    endif
 #  endif
 #endif
@ -59,18 +59,21 @@ extern "C" {
 #define PCRE_NOTEMPTY           0x0400
 #define PCRE_UTF8               0x0800
 #define PCRE_NO_AUTO_CAPTURE    0x1000
+#define PCRE_NO_UTF8_CHECK      0x2000

 /* Exec-time and get/set-time error codes */

-#define PCRE_ERROR_NOMATCH        (-1)
-#define PCRE_ERROR_NULL           (-2)
-#define PCRE_ERROR_BADOPTION      (-3)
-#define PCRE_ERROR_BADMAGIC       (-4)
-#define PCRE_ERROR_UNKNOWN_NODE   (-5)
-#define PCRE_ERROR_NOMEMORY       (-6)
-#define PCRE_ERROR_NOSUBSTRING    (-7)
-#define PCRE_ERROR_MATCHLIMIT     (-8)
-#define PCRE_ERROR_CALLOUT        (-9)  /* Never used by PCRE itself */
+#define PCRE_ERROR_NOMATCH         (-1)
+#define PCRE_ERROR_NULL            (-2)
+#define PCRE_ERROR_BADOPTION       (-3)
+#define PCRE_ERROR_BADMAGIC        (-4)
+#define PCRE_ERROR_UNKNOWN_NODE    (-5)
+#define PCRE_ERROR_NOMEMORY        (-6)
+#define PCRE_ERROR_NOSUBSTRING     (-7)
+#define PCRE_ERROR_MATCHLIMIT      (-8)
+#define PCRE_ERROR_CALLOUT         (-9)  /* Never used by PCRE itself */
+#define PCRE_ERROR_BADUTF8        (-10)
+#define PCRE_ERROR_BADUTF8_OFFSET (-11)

 /* Request types for pcre_fullinfo() */

@ -94,6 +97,7 @@ extern "C" {
 #define PCRE_CONFIG_LINK_SIZE               2
 #define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD  3
 #define PCRE_CONFIG_MATCH_LIMIT             4
+#define PCRE_CONFIG_STACKRECURSE            5

 /* Bit flags for the pcre_extra structure */

@ -137,18 +141,23 @@ typedef struct pcre_callout_block {
 } pcre_callout_block;

 /* Indirection for store get and free functions. These can be set to
-alternative malloc/free functions if required. There is also an optional
-callout function that is triggered by the (?) regex item. Some magic is
-required for Win32 DLL; it is null on other OS. For Virtual Pascal, these
-have to be different again. */
+alternative malloc/free functions if required. Special ones are used in the
+non-recursive case for "frames". There is also an optional callout function
+that is triggered by the (?) regex item. Some magic is required for Win32 DLL;
+it is null on other OS. For Virtual Pascal, these have to be different again.
+*/

 #ifndef VPCOMPAT
 PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
 PCRE_DATA_SCOPE void  (*pcre_free)(void *);
+PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
+PCRE_DATA_SCOPE void  (*pcre_stack_free)(void *);
 PCRE_DATA_SCOPE int   (*pcre_callout)(pcre_callout_block *);
 #else   /* VPCOMPAT */
 extern void *pcre_malloc(size_t);
 extern void  pcre_free(void *);
+extern void *pcre_stack_malloc(size_t);
+extern void  pcre_stack_free(void *);
 extern int   pcre_callout(pcre_callout_block *);
 #endif  /* VPCOMPAT */

--- a/ext/pcre/pcrelib/pcregrep.c
+++ b/ext/pcre/pcrelib/pcregrep.c
@ -545,8 +545,8 @@ for (i = 1; i < argc; i++)
    }
  }

-pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
-hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
+pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
+hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));

 if (pattern_list == NULL || hints_list == NULL)
  {
--- a/ext/pcre/pcrelib/pcreposix.c
+++ b/ext/pcre/pcrelib/pcreposix.c
@ -43,14 +43,14 @@ restrictions:

 /* Corresponding tables of PCRE error messages and POSIX error codes. */

-static const char *estring[] = {
+static const char *const estring[] = {
  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
  ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
  ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
  ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
-  ERR41, ERR42, ERR43 };
+  ERR41, ERR42, ERR43, ERR44 };

-static int eint[] = {
+static const int eint[] = {
  REG_EESCAPE, /* "\\ at end of pattern" */
  REG_EESCAPE, /* "\\c at end of pattern" */
  REG_EESCAPE, /* "unrecognized character follows \\" */
@ -93,12 +93,13 @@ static int eint[] = {
  REG_BADPAT,  /* "recursive call could loop indefinitely" */
  REG_BADPAT,  /* "unrecognized character after (?P" */
  REG_BADPAT,  /* "syntax error after (?P" */
-  REG_BADPAT   /* "two named groups have the same name" */
+  REG_BADPAT,  /* "two named groups have the same name" */
+  REG_BADPAT   /* "invalid UTF-8 string" */
 };

 /* Table of texts corresponding to POSIX error codes */

-static const char *pstring[] = {
+static const char *const pstring[] = {
  "",                                /* Dummy for value 0 */
  "internal error",                  /* REG_ASSERT */
  "invalid repeat counts in {}",     /* BADBR      */
@ -144,7 +145,7 @@ return REG_ASSERT;
 *          Translate error code to string        *
 *************************************************/

-size_t
+EXPORT size_t
 regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
 {
 const char *message, *addmessage;
@ -179,7 +180,7 @@ return length + addlength;
 *           Free store held by a regex           *
 *************************************************/

-void
+EXPORT void
 regfree(regex_t *preg)
 {
 (pcre_free)(preg->re_pcre);
@ -202,7 +203,7 @@ Returns:      0 on success
              various non-zero codes on failure
 */

-int
+EXPORT int
 regcomp(regex_t *preg, const char *pattern, int cflags)
 {
 const char *errorptr;
@ -217,7 +218,7 @@ preg->re_erroffset = erroffset;

 if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);

-preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL);
+preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
 return 0;
 }

@ -235,7 +236,7 @@ ints. However, if the number of possible capturing brackets is small, use a
 block of store on the stack, to reduce the use of malloc/free. The threshold is
 in a macro that can be changed at configure time. */

-int
+EXPORT int
 regexec(const regex_t *preg, const char *string, size_t nmatch,
  regmatch_t pmatch[], int eflags)
 {
@ -264,8 +265,8 @@ if (nmatch > 0)
    }
  }

-rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options,
-  ovector, nmatch * 3);
+rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
+  0, options, ovector, nmatch * 3);

 if (rc == 0) rc = nmatch;    /* All captured slots were filled in */

@ -293,6 +294,9 @@ else
    case PCRE_ERROR_BADMAGIC: return REG_INVARG;
    case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
    case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
+    case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
+    case PCRE_ERROR_BADUTF8: return REG_INVARG;
+    case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
    default: return REG_ASSERT;
    }
  }
--- a/ext/pcre/pcrelib/pcretest.c
+++ b/ext/pcre/pcrelib/pcretest.c
@ -38,7 +38,7 @@ Makefile. */
 #define LOOPREPEAT 50000

 #define BUFFER_SIZE 30000
-#define DBUFFER_SIZE 1024
+#define DBUFFER_SIZE BUFFER_SIZE


 static FILE *outfile;
@ -48,11 +48,11 @@ static int callout_extra;
 static int callout_fail_count;
 static int callout_fail_id;
 static int first_callout;
+static int show_malloc;
 static int use_utf8;
 static size_t gotten_store;


-
 static const int utf8_table1[] = {
  0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};

@ -321,13 +321,16 @@ if (post_start > 0)
  }

 fprintf(outfile, "\n");
-
 first_callout = 0;

-if ((int)(cb->callout_data) != 0)
+if (cb->callout_data != NULL)
  {
-  fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
-  return (int)(cb->callout_data);
+  int callout_data = *((int *)(cb->callout_data));
+  if (callout_data != 0)
+    {
+    fprintf(outfile, "Callout data = %d\n", callout_data);
+    return callout_data;
+    }
  }

 return (cb->callout_number != callout_fail_id)? 0 :
@ -336,7 +339,7 @@ return (cb->callout_number != callout_fail_id)? 0 :


 /*************************************************
-*            Local malloc function               *
+*            Local malloc functions              *
 *************************************************/

 /* Alternative malloc function, to test functionality and show the size of the
@ -344,10 +347,37 @@ compiled re. */

 static void *new_malloc(size_t size)
 {
+void *block = malloc(size);
 gotten_store = size;
-return malloc(size);
+if (show_malloc)
+  fprintf(outfile, "malloc       %3d %p\n", size, block);
+return block;
+}
+
+static void new_free(void *block)
+{
+if (show_malloc)
+  fprintf(outfile, "free             %p\n", block);
+free(block);
+}
+
+
+/* For recursion malloc/free, to test stacking calls */
+
+static void *stack_malloc(size_t size)
+{
+void *block = malloc(size);
+if (show_malloc)
+  fprintf(outfile, "stack_malloc %3d %p\n", size, block);
+return block;
 }

+static void stack_free(void *block)
+{
+if (show_malloc)
+  fprintf(outfile, "stack_free       %p\n", block);
+free(block);
+}


 /*************************************************
@ -397,8 +427,8 @@ unsigned char *dbuffer;
 /* Get buffers from malloc() so that Electric Fence will check their misuse
 when I am debugging. */

-buffer = malloc(BUFFER_SIZE);
-dbuffer = malloc(DBUFFER_SIZE);
+buffer = (unsigned char *)malloc(BUFFER_SIZE);
+dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);

 /* Static so that new_malloc can use it. */

@ -440,6 +470,8 @@ while (argc > 1 && argv[op][0] == '-')
    printf("  POSIX malloc threshold = %d\n", rc);
    (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
    printf("  Default match limit = %d\n", rc);
+    (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
+    printf("  Match recursion uses %s\n", rc? "stack" : "heap");
    exit(0);
    }
  else
@ -464,7 +496,7 @@ while (argc > 1 && argv[op][0] == '-')
 /* Get the store for the offsets vector, and remember what it was */

 size_offsets_max = size_offsets;
-offsets = malloc(size_offsets_max * sizeof(int));
+offsets = (int *)malloc(size_offsets_max * sizeof(int));
 if (offsets == NULL)
  {
  printf("** Failed to get %d bytes of memory for offsets vector\n",
@ -497,6 +529,9 @@ if (argc > 2)
 /* Set alternative malloc function */

 pcre_malloc = new_malloc;
+pcre_free = new_free;
+pcre_stack_malloc = stack_malloc;
+pcre_stack_free = stack_free;

 /* Heading line, then prompt for first regex if stdin */

@ -619,6 +654,7 @@ while (!done)
      case 'U': options |= PCRE_UNGREEDY; break;
      case 'X': options |= PCRE_EXTRA; break;
      case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
+      case '?': options |= PCRE_NO_UTF8_CHECK; break;

      case 'L':
      ppp = pp;
@ -787,7 +823,7 @@ while (!done)
        }

      if (get_options == 0) fprintf(outfile, "No options\n");
-        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
+        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
          ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
          ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
          ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
@ -796,7 +832,8 @@ while (!done)
          ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
          ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
          ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
-          ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
+          ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
+          ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");

      if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
        fprintf(outfile, "Case state changes\n");
@ -861,13 +898,17 @@ while (!done)
      else if (extra == NULL)
        fprintf(outfile, "Study returned NULL\n");

+      /* Don't output study size; at present it is in any case a fixed
+      value, but it varies, depending on the computer architecture, and
+      so messes up the test suite. */
+
      else if (do_showinfo)
        {
        size_t size;
        uschar *start_bits = NULL;
        new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
        new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
-        fprintf(outfile, "Study size = %d\n", size);
+        /* fprintf(outfile, "Study size = %d\n", size); */
        if (start_bits == NULL)
          fprintf(outfile, "No starting character set\n");
        else
@ -929,6 +970,7 @@ while (!done)
    callout_count = 0;
    callout_fail_count = 999999;
    callout_fail_id = -1;
+    show_malloc = 0;

    if (infile == stdin) printf("data> ");
    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
@ -1105,7 +1147,7 @@ while (!done)
          {
          size_offsets_max = n;
          free(offsets);
-          use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
+          use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
          if (offsets == NULL)
            {
            printf("** Failed to get %d bytes of memory for offsets vector\n",
@ -1117,9 +1159,17 @@ while (!done)
        if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
        continue;

+        case 'S':
+        show_malloc = 1;
+        continue;
+
        case 'Z':
        options |= PCRE_NOTEOL;
        continue;
+
+        case '?':
+        options |= PCRE_NO_UTF8_CHECK;
+        continue;
        }
      *q++ = c;
      }
@ -1136,7 +1186,7 @@ while (!done)
      int eflags = 0;
      regmatch_t *pmatch = NULL;
      if (use_size_offsets > 0)
-        pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
+        pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
      if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
      if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;

@ -1203,7 +1253,7 @@ while (!done)

        if (extra == NULL)
          {
-          extra = malloc(sizeof(pcre_extra));
+          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
          extra->flags = 0;
          }
        extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
@ -1242,11 +1292,11 @@ while (!done)
        {
        if (extra == NULL)
          {
-          extra = malloc(sizeof(pcre_extra));
+          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
          extra->flags = 0;
          }
        extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
-        extra->callout_data = (void *)callout_data;
+        extra->callout_data = &callout_data;
        count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
          options | g_notempty, use_offsets, use_size_offsets);
        extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
@ -1345,24 +1395,36 @@ while (!done)

      /* Failed to match. If this is a /g or /G loop and we previously set
      g_notempty after a null match, this is not necessarily the end.
-      We want to advance the start offset, and continue. Fudge the offset
-      values to achieve this. We won't be at the end of the string - that
-      was checked before setting g_notempty. */
+      We want to advance the start offset, and continue. In the case of UTF-8
+      matching, the advance must be one character, not one byte. Fudge the
+      offset values to achieve this. We won't be at the end of the string -
+      that was checked before setting g_notempty. */

      else
        {
        if (g_notempty != 0)
          {
+          int onechar = 1;
          use_offsets[0] = start_offset;
-          use_offsets[1] = start_offset + 1;
+          if (use_utf8)
+            {
+            while (start_offset + onechar < len)
+              {
+              int tb = bptr[start_offset+onechar];
+              if (tb <= 127) break;
+              tb &= 0xc0;
+              if (tb != 0 && tb != 0xc0) onechar++;
+              }
+            }
+          use_offsets[1] = start_offset + onechar;
          }
        else
          {
-          if (gmatched == 0)   /* Error if no previous matches */
+          if (count == PCRE_ERROR_NOMATCH)
            {
-            if (count == -1) fprintf(outfile, "No match\n");
-              else fprintf(outfile, "Error %d\n", count);
+            if (gmatched == 0) fprintf(outfile, "No match\n");
            }
+          else fprintf(outfile, "Error %d\n", count);
          break;  /* Out of the /g loop */
          }
        }
@ -1414,7 +1476,7 @@ while (!done)
    }
  }

-fprintf(outfile, "\n");
+if (infile == stdin) fprintf(outfile, "\n");
 return 0;
 }

--- a/ext/pcre/pcrelib/study.c
+++ b/ext/pcre/pcrelib/study.c
@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.

 Written by: Philip Hazel <ph10@cam.ac.uk>

-           Copyright (c) 1997-2002 University of Cambridge
+           Copyright (c) 1997-2003 University of Cambridge

 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@ -260,6 +260,9 @@ do
      case OP_TYPEMINQUERY:
      switch(tcode[1])
        {
+        case OP_ANY:
+        return FALSE;
+
        case OP_NOT_DIGIT:
        for (c = 0; c < 32; c++)
          start_bits[c] |= ~cd->cbits[c+cbit_digit];
@ -297,19 +300,50 @@ do
      /* Character class where all the information is in a bit map: set the
      bits and either carry on or not, according to the repeat count. If it was
      a negative class, and we are operating with UTF-8 characters, any byte
-      with the top-bit set is a potentially valid starter because it may start
-      a character with a value > 255. (This is sub-optimal in that the
-      character may be in the range 128-255, and those characters might be
-      unwanted, but that's as far as we go for the moment.) */
+      with a value >= 0xc4 is a potentially valid starter because it starts a
+      character with a value > 255. */

      case OP_NCLASS:
-      if (utf8) memset(start_bits+16, 0xff, 16);
+      if (utf8)
+        {
+        start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
+        memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
+        }
      /* Fall through */

      case OP_CLASS:
        {
        tcode++;
-        for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
+
+        /* In UTF-8 mode, the bits in a bit map correspond to character
+        values, not to byte values. However, the bit map we are constructing is
+        for byte values. So we have to do a conversion for characters whose
+        value is > 127. In fact, there are only two possible starting bytes for
+        characters in the range 128 - 255. */
+
+        if (utf8)
+          {
+          for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
+          for (c = 128; c < 256; c++)
+            {
+            if ((tcode[c/8] && (1 << (c&7))) != 0)
+              {
+              int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
+              start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
+              c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
+              }
+            }
+          }
+
+        /* In non-UTF-8 mode, the two bit maps are completely compatible. */
+
+        else
+          {
+          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
+          }
+
+        /* Advance past the bit map, and act on what follows */
+
        tcode += 32;
        switch (*tcode)
          {
@ -363,7 +397,7 @@ Returns:    pointer to a pcre_extra block, with study_data filled in and the
            NULL on error or if no optimization possible
 */

-pcre_extra *
+EXPORT pcre_extra *
 pcre_study(const pcre *external_re, int options, const char **errorptr)
 {
 uschar start_bits[32];
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@ -106,6 +106,15 @@ static PHP_MINIT_FUNCTION(pcre)
 	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
+
+	pcre_malloc = php_pcre_malloc;
+	pcre_free = php_pcre_free;
+
+#ifdef NO_RECURSE
+	pcre_stack_malloc = php_pcre_malloc;
+	pcre_stack_free = php_pcre_free;
+#endif
+	
 	return SUCCESS;
 }
 /* }}} */
@ -121,16 +130,6 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
 }
 /* }}} */

-/* {{{ PHP_RINIT_FUNCTION(pcre) */
-static PHP_RINIT_FUNCTION(pcre)
-{
-	pcre_malloc = php_pcre_malloc;
-	pcre_free = php_pcre_free;
-	
-	return SUCCESS;
-}
-/* }}} */
-
 /* {{{ pcre_get_compiled_regex
 */
 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) {
@ -1520,7 +1519,7 @@ zend_module_entry pcre_module_entry = {
 	pcre_functions,
 	PHP_MINIT(pcre),
 	PHP_MSHUTDOWN(pcre),
-	PHP_RINIT(pcre),
+	NULL,
 	NULL,
 	PHP_MINFO(pcre),
 	NO_VERSION_YET,