Allow searching any filter using "literal" (exact) matches, without word modifications

This commit is contained in:
Bofh 2022-12-18 01:05:24 +01:00
parent 727fdd5ac6
commit 2a1c28e041
1 changed files with 73 additions and 30 deletions

103
base.php
View File

@ -465,21 +465,31 @@ function parse_comparing_expression($expr) {
];
}
function matches_comparing_expression($expr, $text, $debug=false) {
function matches_comparing_expression($expr, $o_text, $debug=false) {
if (gettype($expr) === 'string')
$expr = parse_comparing_expression($expr);
$result = $expr['original'];
if ($debug) {
echo '<h2>Original</h2>';
echo '<pre>'.$text.'</pre>';
echo '<pre>'.$o_text.'</pre>';
}
$text = normalize_for_search($text);
$text = normalize_for_search($o_text);
if ($debug) {
echo '<h2>Normalized</h2>';
echo '<pre>'.$text.'</pre>';
echo '<br><br>';
}
$text_words = explode(' ', $text);
$m_text_words = explode(' ', $text);
$o_text_words = null;
if (preg_match('/(^|\s)\*\w+/', $expr['original']))
{
$nw_text_words = [];
$o_text_words = explode(' ', $o_text);
foreach ($o_text_words as $o)
if ($o !== '')
$nw_text_words []= $o;
$o_text_words = $nw_text_words;
}
foreach ($expr['parsed'] as $t)
{
$bool = false;
@ -487,14 +497,26 @@ function matches_comparing_expression($expr, $text, $debug=false) {
$selector = substr($t, 0, strpos($t, ' '));
$content = substr($t, strpos($t, ' ')+1);
$content = trim(trim(trim($content), '"'));
$o_bool = ($selector[0] === '*' && $o_text_words !== null);
if ($o_bool)
$selector = substr($selector, 1);
switch ($selector)
{
case 'words':
case 'hasall':
$ws = explode(' ', $content);
$hasall = true;
foreach ($ws as $w) {
if (!in_array(normalize_for_search($w), $text_words)) {
foreach ($ws as $w)
{
if ($o_bool) {
if (!in_array($w, $o_text_words)) {
$hasall = false;
break;
}
continue;
}
if (!in_array(normalize_for_search($w), $m_text_words)) {
$hasall = false;
break;
}
@ -516,8 +538,16 @@ function matches_comparing_expression($expr, $text, $debug=false) {
case 'hasany':
$ws = explode(' ', $content);
$hasany = false;
foreach ($ws as $w) {
if (in_array(normalize_for_search($w), $text_words)) {
foreach ($ws as $w)
{
if ($o_bool) {
if (in_array($w, $o_text_words)) {
$hasany = true;
break;
}
continue;
}
if (in_array(normalize_for_search($w), $m_text_words)) {
$hasany = true;
break;
}
@ -538,8 +568,14 @@ function matches_comparing_expression($expr, $text, $debug=false) {
case 'has':
case 'includes':
case 'contains':
$a = str_replace(' ', '', $text);
$b = str_replace(' ', '', normalize_for_search($content));
$a = null; $b = null;
if ($o_bool) {
$a = str_replace(' ', '', $o_text);
$b = str_replace(' ', '', $content);
} else {
$a = str_replace(' ', '', $text);
$b = str_replace(' ', '', normalize_for_search($content));
}
if ($debug) {
echo '<br><br>';
echo '<h3>contains</h3>';
@ -558,32 +594,36 @@ function matches_comparing_expression($expr, $text, $debug=false) {
case 'regex':
case 'matches':
if (strlen($content) <= 0)
if (strlen($content) === 0)
continue 2;
$content = normalize_word_sound($content, function($w, $h) {
if ($h[0] === 'b')
return preg_replace('/([^\\\])b/', '\1'.$h[1], $w);
if (preg_match('/^\d$/', $h[0])) {
$b = false;
for ($i = 0; $i < strlen($w); $i++) {
if (in_array($w[$i], ['{','}','[',']'])) {
$b = !$b;
continue;
if (!$o_bool)
{
$content = normalize_word_sound($content, function($w, $h) {
if ($h[0] === 'b')
return preg_replace('/([^\\\])b/', '\1'.$h[1], $w);
if (preg_match('/^\d$/', $h[0])) {
$b = false;
for ($i = 0; $i < strlen($w); $i++) {
if (in_array($w[$i], ['{','}','[',']'])) {
$b = !$b;
continue;
}
if (!$b && $w[$i] === $h[0])
$w = set_chat_at($w, $i, $h[1]);
}
if (!$b && $w[$i] === $h[0])
$w = set_chat_at($w, $i, $h[1]);
return $w;
}
return $w;
}
return str_replace($h[0], $h[1], $w);
});
preg_match_all('/\w{4,}/', $content, $mts);
foreach ((isset($mts[0]) ? $mts[0] : []) as $m)
$content = preg_replace('/\b'.$m.'\b/', normalize_for_search($m), $content);
return str_replace($h[0], $h[1], $w);
});
preg_match_all('/\w{4,}/', $content, $mts);
foreach ((isset($mts[0]) ? $mts[0] : []) as $m)
$content = preg_replace('/\b'.$m.'\b/', normalize_for_search($m), $content);
}
if ($content[0] != '^')
$content = '^.*'.$content;
if ($content[strlen($content)-1] != '$')
$content .= '.*$';
$content = str_replace('#', '\#', $content);
if ($debug) {
echo '<br><br>';
echo '<h3>regex</h3>';
@ -591,7 +631,10 @@ function matches_comparing_expression($expr, $text, $debug=false) {
echo $content;
echo '</pre>';
}
$bool = preg_match('#'.$content.'#', $text) === 1;
$bool = false;
if ($o_bool)
$bool = preg_match('#'.$content.'#i', $o_text) === 1;
else $bool = preg_match('#'.$content.'#', $text) === 1;
if ($debug) {
echo '<br>Result: ';
var_dump($bool);