From 2a1c28e0417716c996e31c43152a54eee2fa2ad5 Mon Sep 17 00:00:00 2001 From: Bofh Date: Sun, 18 Dec 2022 01:05:24 +0100 Subject: [PATCH] Allow searching any filter using "literal" (exact) matches, without word modifications --- base.php | 103 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 30 deletions(-) diff --git a/base.php b/base.php index 8db8721..727804d 100644 --- a/base.php +++ b/base.php @@ -465,21 +465,31 @@ function parse_comparing_expression($expr) { ]; } -function matches_comparing_expression($expr, $text, $debug=false) { +function matches_comparing_expression($expr, $o_text, $debug=false) { if (gettype($expr) === 'string') $expr = parse_comparing_expression($expr); $result = $expr['original']; if ($debug) { echo '

Original

'; - echo '
'.$text.'
'; + echo '
'.$o_text.'
'; } - $text = normalize_for_search($text); + $text = normalize_for_search($o_text); if ($debug) { echo '

Normalized

'; echo '
'.$text.'
'; echo '

'; } - $text_words = explode(' ', $text); + $m_text_words = explode(' ', $text); + $o_text_words = null; + if (preg_match('/(^|\s)\*\w+/', $expr['original'])) + { + $nw_text_words = []; + $o_text_words = explode(' ', $o_text); + foreach ($o_text_words as $o) + if ($o !== '') + $nw_text_words []= $o; + $o_text_words = $nw_text_words; + } foreach ($expr['parsed'] as $t) { $bool = false; @@ -487,14 +497,26 @@ function matches_comparing_expression($expr, $text, $debug=false) { $selector = substr($t, 0, strpos($t, ' ')); $content = substr($t, strpos($t, ' ')+1); $content = trim(trim(trim($content), '"')); + $o_bool = ($selector[0] === '*' && $o_text_words !== null); + if ($o_bool) + $selector = substr($selector, 1); + switch ($selector) { case 'words': case 'hasall': $ws = explode(' ', $content); $hasall = true; - foreach ($ws as $w) { - if (!in_array(normalize_for_search($w), $text_words)) { + foreach ($ws as $w) + { + if ($o_bool) { + if (!in_array($w, $o_text_words)) { + $hasall = false; + break; + } + continue; + } + if (!in_array(normalize_for_search($w), $m_text_words)) { $hasall = false; break; } @@ -516,8 +538,16 @@ function matches_comparing_expression($expr, $text, $debug=false) { case 'hasany': $ws = explode(' ', $content); $hasany = false; - foreach ($ws as $w) { - if (in_array(normalize_for_search($w), $text_words)) { + foreach ($ws as $w) + { + if ($o_bool) { + if (in_array($w, $o_text_words)) { + $hasany = true; + break; + } + continue; + } + if (in_array(normalize_for_search($w), $m_text_words)) { $hasany = true; break; } @@ -538,8 +568,14 @@ function matches_comparing_expression($expr, $text, $debug=false) { case 'has': case 'includes': case 'contains': - $a = str_replace(' ', '', $text); - $b = str_replace(' ', '', normalize_for_search($content)); + $a = null; $b = null; + if ($o_bool) { + $a = str_replace(' ', '', $o_text); + $b = str_replace(' ', '', $content); + } else { + $a = str_replace(' ', '', $text); + $b = str_replace(' ', '', normalize_for_search($content)); + } if ($debug) { echo '

'; echo '

contains

'; @@ -558,32 +594,36 @@ function matches_comparing_expression($expr, $text, $debug=false) { case 'regex': case 'matches': - if (strlen($content) <= 0) + if (strlen($content) === 0) continue 2; - $content = normalize_word_sound($content, function($w, $h) { - if ($h[0] === 'b') - return preg_replace('/([^\\\])b/', '\1'.$h[1], $w); - if (preg_match('/^\d$/', $h[0])) { - $b = false; - for ($i = 0; $i < strlen($w); $i++) { - if (in_array($w[$i], ['{','}','[',']'])) { - $b = !$b; - continue; + if (!$o_bool) + { + $content = normalize_word_sound($content, function($w, $h) { + if ($h[0] === 'b') + return preg_replace('/([^\\\])b/', '\1'.$h[1], $w); + if (preg_match('/^\d$/', $h[0])) { + $b = false; + for ($i = 0; $i < strlen($w); $i++) { + if (in_array($w[$i], ['{','}','[',']'])) { + $b = !$b; + continue; + } + if (!$b && $w[$i] === $h[0]) + $w = set_chat_at($w, $i, $h[1]); } - if (!$b && $w[$i] === $h[0]) - $w = set_chat_at($w, $i, $h[1]); + return $w; } - return $w; - } - return str_replace($h[0], $h[1], $w); - }); - preg_match_all('/\w{4,}/', $content, $mts); - foreach ((isset($mts[0]) ? $mts[0] : []) as $m) - $content = preg_replace('/\b'.$m.'\b/', normalize_for_search($m), $content); + return str_replace($h[0], $h[1], $w); + }); + preg_match_all('/\w{4,}/', $content, $mts); + foreach ((isset($mts[0]) ? $mts[0] : []) as $m) + $content = preg_replace('/\b'.$m.'\b/', normalize_for_search($m), $content); + } if ($content[0] != '^') $content = '^.*'.$content; if ($content[strlen($content)-1] != '$') $content .= '.*$'; + $content = str_replace('#', '\#', $content); if ($debug) { echo '

'; echo '

regex

'; @@ -591,7 +631,10 @@ function matches_comparing_expression($expr, $text, $debug=false) { echo $content; echo ''; } - $bool = preg_match('#'.$content.'#', $text) === 1; + $bool = false; + if ($o_bool) + $bool = preg_match('#'.$content.'#i', $o_text) === 1; + else $bool = preg_match('#'.$content.'#', $text) === 1; if ($debug) { echo '
Result: '; var_dump($bool);