I have a PHP script that calls PREG_MATCH_ALL in a loop. The purpose is to test a series of regexs. The script is supposed to call preg_match_all 112 iterations in a loop but with > 9500 bytes search string it dies after about 100 iterations. The larger the input string the sooner preg_match_all dies and kills the script. I've tried changing the regex a little and the input text but the problem persists. I do not get any error messages.
PHP Version 5.2.14
PCRE Lib 8.02
Here is a dumbed down script with the problem:
boLib.php
bosettings.php
PHP Version 5.2.14
PCRE Lib 8.02
Here is a dumbed down script with the problem:
Code:
<?PHP
include_once "boLib.php";
include_once "bosettings.php";
$checkstr = $_POST['checkstr'];
$currentDateTime = date("Y-m-d-H-i-s-"). substr((string)microtime(), 2, 8);
DebugToLog(' ');
DebugToLog(' ');
DebugToLog(' ');
DebugToLog(' ');
DebugToLog(' ');
DebugToLog('=================================== DEBUG: BEGIN TEST OF INPUT STRING ON ' . $currentDateTime . ' ============================');
DebugToLog('Date and time is ' . $currentDateTime);
// DebugToLog('DEBUG: $_POST checkstr=' . $checkstr);
$checkstrSize = strlen($checkstr);
DebugToLog('DEBUG: Input string size=' . $checkstrSize);
error_reporting(E_ALL);
ini_set('display_errors', true);
$real_usage = TRUE;
DebugToLog('DEBUG: BEGIN MEMORY=' . memory_get_usage());
DebugToLog('DEBUG: MEMORY PEAK=' . memory_get_peak_usage());
$encode = mb_detect_encoding($checkstr, "auto");
DebugToLog('DEBUG: Encoding from mb_detect_encoding=' . $encode);
$checkstr = stripslashes($checkstr);
DebugToLog(' ');
DebugToLog(' ');
DebugToLog($checkstr );
DebugToLog('DEBUG: Begining memory=' . memory_get_usage());
DebugToLog('DEBUG: Memory peak=' . memory_get_peak_usage());
$TempArray = array();
$matches = array();
$iOffset = 0;
$i2 = 0;
$array_elements = 112;
$abigregex = '/([l]{1}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[u\xB5]{1,15}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[c\xA2\xA9\x80]{1,15}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[k]{1,15}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[i1!\xA1\xEC-\xEF]{1,15}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[n]{1,15}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[-_^.,`~\'\s]{0,15}[!@#$%&*\/\+=\(\)\\]{0,2}[g]) | ((?=([^\*]*?\*[^\*\s]*?(\*[^\*]*?)?))f[-_^.,`~\'\s]{0,15}[u\*][-_^.,`~\'\s]{0,15}c[-_^.,`~\'\s]{0,15}k[-_^.,`~\'\s]{0,15}[i\*][-_^.,`~\'\s]{0,15}n[-_^.,`~\'\s]{0,15}g)/iux';
$REGEXSize = strlen($abigregex);
DebugToLog('INFO: Regex to be used is: ' . $abigregex . ' =========================================================');
DebugToLog('DEBUG: REGEXSize string size=' . $REGEXSize);
DebugToLog(' ');
DebugToLog(' ');
for ($i=0; $i<$array_elements; $i++)
{
DebugToLog('-----------------------------------------------------------------------');
DebugToLog('INFO: Call REGEX Iteration ' . $i);
DebugToLog('DEBUG: MEMORY IS=' . memory_get_usage());
DebugToLog('DEBUG: PEAK MEMORY IS=' . memory_get_peak_usage());
$TempArray = NULL;
$regrtncode = preg_match_all( $abigregex, $checkstr, $TempArray, PREG_OFFSET_CAPTURE, $iOffset);
$REGerror = preg_last_error();
pcre_error_decode($REGerror);
if ($regrtncode > 0)
{ echo '<BR>WE FOUND SOMETHING! GT zero'; }
ELSE { echo '<BR>NO MATCH! eq zero'; }
}
?>
boLib.php
Code:
<?PHP
function pcre_error_decode($REGerror ) {
echo '<br> Now Test return code=' . $REGerror ;
switch ($REGerror) {
case PREG_PATTERN_ORDER:
print "Orders results so that $matches[0] is an array of full pattern matches, $matches[1] is an array of strings matched by the first parenthesized subpattern, and so on. This flag is only used with preg_match_all().\n";
break;
case PREG_SET_ORDER:
print "Orders results so that $matches[0] is an array of first set of matches, $matches[1] is an array of second set of matches, and so on. This flag is only used with preg_match_all().\n";
break;
case PREG_OFFSET_CAPTURE:
print "See the description of PREG_SPLIT_OFFSET_CAPTURE. This flag is available since PHP 4.3.0.\n";
break;
case PREG_SPLIT_NO_EMPTY:
print "This flag tells preg_split() to return only non-empty pieces.\n";
break;
case PREG_SPLIT_DELIM_CAPTURE:
print "This flag tells preg_split() to capture parenthesized expression in the delimiter pattern as well. This flag is available since PHP 4.0.5.\n";
break;
case PREG_SPLIT_OFFSET_CAPTURE:
print "If this flag is set, for every occurring match the appendant string offset will also be returned. Note that this changes the return values in an array where every element is an array consisting of the matched string at offset 0 and its string offset within subject at offset 1. This flag is available since PHP 4.3.0 and is only used for preg_split().\n";
break;
case PREG_NO_ERROR:
// do not print in this case
//print "Returned by preg_last_error() if there were no errors. Available since PHP 5.2.0.\n";
break;
case PREG_INTERNAL_ERROR:
print "Returned by preg_last_error() if there was an internal PCRE error. Available since PHP 5.2.0.\n";
break;
case PREG_BACKTRACK_LIMIT_ERROR:
print "Returned by preg_last_error() if backtrack limit was exhausted. Available since PHP 5.2.0.\n";
break;
case PREG_RECURSION_LIMIT_ERROR:
print "Returned by preg_last_error() if recursion limit was exhausted. Available since PHP 5.2.0.\n";
break;
case PREG_BAD_UTF8_ERROR:
print "Returned by preg_last_error() if the last error was caused by malformed UTF-8 data (only when running a regex in UTF-8 mode). Available since PHP 5.2.0.\n";
break;
case PREG_BAD_UTF8_OFFSET_ERROR:
print "Returned by preg_last_error() if the offset didn't correspond to the begin of a valid UTF-8 code point (only when running a regex in UTF-8 mode). Available since PHP 5.3.0.\n";
break;
case PCRE_VERSION:
print "PCRE version and release date (e.g. '7.0 18-Dec-2006'). Available since PHP 5.2.4.\n";
break;
default :
DebugToLog('<br> the condition does not equal any prev value');
break;
}
}
function DebugToLog($msg){
include "bosettings.php";
if ($LogToScreen) { echo '<br>' . $msg; return; }
}
?>
bosettings.php
Code:
<?PHP
//
$MaxInputStringSize = 10000000;
$MaxInputToChk = 10000000;
$DataDir = "";
$LogToScreen = TRUE;
$errorchecking = FALSE;
$MinCheckstrSize = 2;
?>