#native_company# #native_desc#
#native_cta#

Search Criteria Parser

By Lee Aholima
on November 1, 2002

Version: 1.0

Type: Full Script

Category: Algorithms

License: GNU General Public License

Description: Code to interpret a search criteria which contains boolean operators (and, not , or) and opening and closing brackets ( and ) into tokens and parse it for correctness. You can then read the tokens like in a switch statement and add code such as mysql’s LOCATE(<token>,<text column in a mysql table>)>0 to each non reserved (ie not “and”, “or”, “not”,
“)” , “(” ) token (a keyword) to check that it appears in the <text column in a mysql table>.

<?php
/*
**	Topic: 			search criteria parser
**	Original Author:	Lee Aholima, Day Five Consulting Ltd
**	History:
**		20021030 - Lee Aholima, Day Five Consulting Ltd
**			Created
**		2002**** - editor, co.
**			comments
**
** Description:
** 	Parses search criteria according to the following rules
**	E -> E and T
**	E -> E or T
**	E -> T
**	T -> not F
**	T -> F
**	F -> Id
**	F -> (E)
**
**	processes search criteria as lowercase.
*/
$rhs = array ("E and T","E or T","T","not F","F","Id","( E )" );
$lhs = array ("E","E","E","T","T","F","F");

function gettokens($str) {
	/* put the string into lowercase */
	$str = strtolower($str);

	/* make sure ( or ) get picked up as separate tokens */
	$str = str_replace("("," ( ",$str);
	$str = str_replace(")"," ) ",$str);

	/* get the actual tokens */
	$actualtokens = explode(" ",$str);

	/* trim spaces around tokens and discard those which have only spaces in them */
	$h=0;
	for ($i=0;$i<sizeof($actualtokens);$i++) {
		$actualtokens[$i]=trim($actualtokens[$i]);
		if ($actualtokens[$i] != "") {
			$nospacetokens[$h++] = $actualtokens[$i];
		}
	}

	/* now put together tokens which are actually one token e.g. upper hutt */
	$onetoken = "";
	$h=0;
	for ($i=0;$i<sizeof($nospacetokens);$i++) {
		$token = $nospacetokens[$i];
		switch ($token) {
			case ")" :
				if ($onetoken != "") {
					$tokens[$h++] = $onetoken;
					$onetoken = "";
				}
				$tokens[$h++] = $token;
				break;

			case "(" :
				if ($onetoken != "") {
					$tokens[$h++] = $onetoken;
					$onetoken = "";
				}
				$tokens[$h++] = $token;
				break;

			case "and" :
				if ($onetoken != "") {
					$tokens[$h++] = $onetoken;
					$onetoken = "";
				}
				$tokens[$h++] = $token;
				break;

			case "or" :
				if ($onetoken != "") {
					$tokens[$h++] = $onetoken;
					$onetoken = "";
				}
				$tokens[$h++] = $token;
				break;

			case "not" :
				if ($onetoken != "") {
					$tokens[$h++] = $onetoken;
					$onetoken = "";
				}
				$tokens[$h++] = $token;
				break;

			default :
				if ($onetoken == "") {
					$onetoken = $token;
				}
				else {
					$onetoken = $onetoken." ".$token;
				}
				break;
		}
	}
	if ($onetoken != "") {
		$tokens[$h++] = $onetoken;
		$onetoken = "";
	}
	return $tokens;
}

function checkwithrules ($tokens) {
	global $rhs;
	global $lhs;

	$i=0;
	$stack="";
	while ($i<sizeof($tokens)) {
		$token = $tokens[$i];
		switch ($token) {
			case "and" :
			case "or"  :
			case "not" :
			case "("   :
			case ")"   :
				if ($stack == "") {
					$stack = $token;
				}
				else {
					$stack = $stack." ".$token;
				}
					/* go through the rules */
					$j=0;
					while ( $j<sizeof($rhs) ) {
						$len = strlen($rhs[$j]);
						$lenstack = strlen($stack);
						if ($lenstack < $len) {
							$j++;
							continue;
						}
						$str = substr($stack,$lenstack - $len,$len);
						echo "<br>stack=".$stack.",str=".$str.",rhs[j]=".$rhs[$j];
						if ( $str == $rhs[$j] ) {
							$stack = substr($stack,0,$lenstack - $len);
							$stack = $stack.$lhs[$j];
							$j=0;
						}
						else {
							$j++;
						}
					}
				break;

			default :
				if ($stack == "") {
					$stack = "Id";
				}
				else {
					$stack = $stack." "."Id";
				}
			
					/* go through the rules */
					$j=0;
					while ( $j<sizeof($rhs) ) {
						$len = strlen($rhs[$j]);
						$lenstack = strlen($stack);
						if ($lenstack < $len) {
							$j++;
							continue;
						}
						$str = substr($stack,$lenstack - $len,$len);
						echo "<br>stack=".$stack.",str=".$str.",rhs[j]=".$rhs[$j];
						if ( $str == $rhs[$j] ) {
							$stack = substr($stack,0,$lenstack - $len);
							$stack = $stack.$lhs[$j];
							$j=0;
						}
						else {
							$j++;
						}
					}
				break;
		}

		$i++;
	}
	echo "<br>Stack = '".$stack."'";
	if ($stack != "E") {
		return false;
	}

	return true;
}

/* End of essential code */
/* Cut and paste from the top of this file to here */
/*-----------------------------------------------------------------*/

echo "<html>";
if (isset($searchcriteria)) {
	echo "<br>Search criteria is '".$searchcriteria."'<br>";
	echo "<br>The tokens are...<br>";
	$tokens = gettokens($searchcriteria);
	for ($i=0;$i<sizeof($tokens);$i++)
	{
		echo "<br>".$tokens[$i];
	}
	echo "<br><br>Now to check with the rules that it is accepted ";
	echo "<br>(should be a whole lot of output showing the analysis)...";
	$complieswithrules = checkwithrules($tokens);
	if ($complieswithrules == true) {
		echo "<br>SUCCESS: complies with the rules";
	}
	else {
		echo "<br>ERROR: does not comply with the rules";
	}
}
echo "<form name=searchform action=scp.php method=post>";
echo "<p>Please enter a search criteria an eg would be";
echo "<br>chef and (2nd or Second) and not (wellington or south island)</p>";
echo "<p><input type=text name=searchcriteria size=50 maxlength=255></p>";
echo "<p><input type=submit>&nbsp;&nbsp;<input type=reset></p>";
echo "</html>";	
?>