Support filters with "=" selector, which does the same as normal, without normalizing word sounds

This commit is contained in:
Bofh 2022-12-18 13:15:48 +01:00
parent ce8fb2ef29
commit b365cd46d2
1 changed files with 32 additions and 4 deletions

View File

@ -405,7 +405,7 @@ function normalize_word_sound($word, $cback=null) {
return $word;
}
function normalize_for_search($str) {
function normalize_for_search($str, $wsound=true) {
if (trim($str) === '') return '';
$str = trim(strip_tags(str_replace('>', '> ', $str)));
$str = strtolower(remove_accents($str));
@ -416,6 +416,7 @@ function normalize_for_search($str) {
foreach ($words as $word) {
if (trim($word) === '')
continue;
if ($wsound)
$word = normalize_word_sound($word);
$nword = '';
if (strlen($word) <= 3) {
@ -474,12 +475,20 @@ function matches_comparing_expression($expr, $o_text, $debug=false) {
echo '<pre>'.$o_text.'</pre>';
}
$text = normalize_for_search($o_text);
$i_text = null;
if (strpos($expr['original'], '=') !== false)
$i_text = normalize_for_search($o_text, false);
if ($debug) {
echo '<h2>Normalized</h2>';
echo '<pre>'.$text.'</pre>';
echo '<br><br>';
}
$m_text_words = explode(' ', $text);
$i_text_words = null;
if ($i_text !== null)
$i_text_words = explode(' ', $i_text);
$o_text_words = null;
if (preg_match('/(^|\s)\*\w+/', $expr['original']))
{
@ -490,6 +499,7 @@ function matches_comparing_expression($expr, $o_text, $debug=false) {
$nw_text_words []= $o;
$o_text_words = $nw_text_words;
}
foreach ($expr['parsed'] as $t)
{
$bool = false;
@ -498,7 +508,8 @@ function matches_comparing_expression($expr, $o_text, $debug=false) {
$content = substr($t, strpos($t, ' ')+1);
$content = trim(trim(trim($content), '"'));
$o_bool = ($selector[0] === '*' && $o_text_words !== null);
if ($o_bool)
$i_bool = ($selector[0] === '=' && $i_text_words !== null);
if ($o_bool || $i_bool)
$selector = substr($selector, 1);
switch ($selector)
@ -516,6 +527,13 @@ function matches_comparing_expression($expr, $o_text, $debug=false) {
}
continue;
}
if ($i_bool) {
if (!in_array(normalize_for_search($w, false), $i_text_words)) {
$hasall = false;
break;
}
continue;
}
if (!in_array(normalize_for_search($w), $m_text_words)) {
$hasall = false;
break;
@ -547,6 +565,13 @@ function matches_comparing_expression($expr, $o_text, $debug=false) {
}
continue;
}
if ($i_bool) {
if (in_array(normalize_for_search($w, false), $i_text_words)) {
$hasany = true;
break;
}
continue;
}
if (in_array(normalize_for_search($w), $m_text_words)) {
$hasany = true;
break;
@ -572,6 +597,9 @@ function matches_comparing_expression($expr, $o_text, $debug=false) {
if ($o_bool) {
$a = str_replace(' ', '', $o_text);
$b = str_replace(' ', '', $content);
} else if ($i_bool) {
$a = str_replace(' ', '', $i_text);
$b = str_replace(' ', '', normalize_for_search($content, false));
} else {
$a = str_replace(' ', '', $text);
$b = str_replace(' ', '', normalize_for_search($content));
@ -617,7 +645,7 @@ function matches_comparing_expression($expr, $o_text, $debug=false) {
});
preg_match_all('/\w{4,}/', $content, $mts);
foreach ((isset($mts[0]) ? $mts[0] : []) as $m)
$content = preg_replace('/\b'.$m.'\b/', normalize_for_search($m), $content);
$content = preg_replace('/\b'.$m.'\b/', normalize_for_search($m, !$i_bool), $content);
}
if ($content[0] != '^')
$content = '^.*'.$content;