Implemented more complex expressions such as "regex" + improvements on matches

This commit is contained in:
Bofh 2022-12-15 03:23:55 +01:00
parent e99c1bfc89
commit 3e1f3d01e0
2 changed files with 113 additions and 80 deletions

117
base.php
View File

@ -380,13 +380,12 @@ if (!function_exists('str_starts_with')) {
function normalize_for_search($str) { function normalize_for_search($str) {
if (trim($str) === '') return ''; if (trim($str) === '') return '';
$str = preg_replace('/\s\s+/', ' ', trim($str)); $str = strtolower(remove_accents(trim($str)));
$words = explode(' ', $str); $str = preg_replace('/[^a-z0-9]+/', ' ', $str);
$str = preg_replace('/\s+/', ' ', $str);
$words = explode(' ', trim($str));
$newwords = []; $newwords = [];
foreach ($words as $word) { foreach ($words as $word) {
$word = remove_accents($word);
$word = strtolower($word);
$word = preg_replace('/[^a-z0-9]+/', '', $word);
if (trim($word) === '') if (trim($word) === '')
continue; continue;
$word = str_replace('4', 'a', $word); $word = str_replace('4', 'a', $word);
@ -411,35 +410,15 @@ function normalize_for_search($str) {
} }
function parse_comparing_expression($expr) { function parse_comparing_expression($expr) {
$lvl = 0; $expr = preg_replace('/\n/', ' ', $expr);
$str_levels = []; $expr = preg_replace('/\s+/', ' ', $expr);
for ($i = 0; $i < strlen($expr); $i++) { $p_expr = preg_replace('/\(|\)/', ' ', $expr);
$s = $expr[$i]; $p_expr = preg_replace('/\s+/', ' ', $p_expr);
if ($s === '(') { $p_expr = preg_split('/OR|AND(\sNOT)?/', $p_expr);
$lvl++; continue; foreach ($p_expr as &$e) $e = trim($e);
} else if ($s === ')') {
$lvl--; continue;
}
if (!isset($str_levels[$lvl]))
$str_levels[$lvl] = '';
$str_levels[$lvl] .= $s;
}
foreach ($str_levels as $key => $val) {
$val = trim($val);
if (strpos($val, 'OR') === 0)
$val = trim(substr($val, 2));
else if (strpos($val, 'AND') === 0)
$val = trim(substr($val, 3));
$val = str_replace('OR', '[[<SEP>]]', $val);
$val = str_replace('AND', '[[<SEP>]]', $val);
$val = explode('[[<SEP>]]', $val);
foreach ($val as &$v)
$v = trim($v);
$str_levels[$key] = $val;
}
return [ return [
'original' => $expr, 'original' => $expr,
'parsed' => $str_levels 'parsed' => $p_expr,
]; ];
} }
@ -448,30 +427,66 @@ function matches_comparing_expression($expr, $text) {
$expr = parse_comparing_expression($expr); $expr = parse_comparing_expression($expr);
$result = $expr['original']; $result = $expr['original'];
$text_words = explode(' ', $text); $text_words = explode(' ', $text);
foreach ($expr['parsed'] as $lvl => $exl) { foreach ($expr['parsed'] as $t)
foreach ($exl as &$t) { {
if (strpos($t, 'words ') === 0) { $bool = false;
$w = substr($t, strlen('words')+1); $t = trim($t);
$ws = explode(' ', trim(trim($w, '"'))); $selector = substr($t, 0, strpos($t, ' '));
$haveall = true; $content = substr($t, strpos($t, ' ')+1);
$content = trim(trim(trim($content), '"'));
switch ($selector)
{
case 'words':
case 'hasall':
$ws = explode(' ', $content);
$hasall = true;
foreach ($ws as $w) { foreach ($ws as $w) {
if (!in_array(normalize_for_search($w), $text_words)) { if (!in_array(normalize_for_search($w), $text_words)) {
$haveall = false; $hasall = false;
break; break;
} }
} }
$result = str_replace($t, $haveall ? 'true' : 'false', $result); $bool = $hasall;
} else if (strpos($t, 'contains ') === 0) { break;
$w = substr($t, strlen('contains')+1);
$w = trim(trim($w, '"')); case 'anyword':
$contains = strpos(' '.$text.' ', case 'hasany':
' '.normalize_for_search($w).' ') !== false; $ws = explode(' ', $content);
$result = str_replace($t, $contains ? 'true' : 'false', $result); $hasany = false;
foreach ($ws as $w) {
if (in_array(normalize_for_search($w), $text_words)) {
$hasany = true;
break;
} }
} }
$bool = $hasany;
break;
case 'has':
case 'includes':
case 'contains':
$bool = strpos(' '.$text.' ',
' '.normalize_for_search($content).' ') !== false;
break;
case 'regex':
case 'matches':
if (strlen($content) <= 0)
continue 2;
if ($content[0] != '^')
$content = '^.*'.$content;
if ($content[strlen($content)-1] != '$')
$content .= '.*$';
$bool = preg_match('#'.$content.'#', $text) === 1;
break;
} }
$result = str_replace('OR', '|', $result); $result = str_replace($t, $bool ? 'true' : 'false', $result);
$result = str_replace('AND', '&', $result); }
$result = str_replace('OR', '||', $result);
$result = str_replace('AND', '&&', $result);
$result = str_replace('NOT ', '!', $result);
$result = str_replace('NOT', '!', $result);
try { try {
eval('$result = '.$result.';'); eval('$result = '.$result.';');
} catch (\Throwable $e) { } catch (\Throwable $e) {
@ -559,14 +574,12 @@ function cronjob_db_create($software, $instance, $sql, $time=3600) {
if (!file_exists($dir_crons_db)) if (!file_exists($dir_crons_db))
mkdir($dir_crons_db); mkdir($dir_crons_db);
$cron_file = $software.','.$instance.','.$job_key; $cron_file = $software.','.$instance.','.$job_key;
$result_file = $dir_crons_db.'/'.$cron_file.','.$time;
$touch_1970 = !file_exists($result_file);
foreach (scandir($dir_crons_db) as $fl) { foreach (scandir($dir_crons_db) as $fl) {
if (strpos($fl, $cron_file) !== false) if (strpos($fl, $cron_file) !== false)
unlink($dir_crons_db.'/'.$fl); unlink($dir_crons_db.'/'.$fl);
} }
$touch_1970 = false;
$result_file = $dir_crons_db.'/'.$cron_file.','.$time;
if (!file_exists($result_file))
$touch_1970 = true;
file_put_contents($result_file, $sql); file_put_contents($result_file, $sql);
if ($touch_1970) touch($result_file, 1000); if ($touch_1970) touch($result_file, 1000);
return $cron_file; return $cron_file;

View File

@ -37,39 +37,59 @@ window.view.instance = {
}, },
html_add_search_spans: function(html, search) { html_add_search_spans: function(html, search) {
search = search.trim(); search = search.trim();
if (search.startsWith('expr:'))
{
const hwords = html.split(/\b/); const hwords = html.split(/\b/);
if (search.startsWith('expr:')) {
search = search.substr(5).trim(); search = search.substr(5).trim();
var sxs = search.replaceAll('(', '').replaceAll(')','').split(/OR|AND/); var sxs = search
for (var i = 0; i < sxs.length; i++) { .replaceAll('\n', ' ')
var sx = sxs[i].trim(); .replaceAll('(', '')
.replaceAll(')','')
.replaceAll(/\s+/g, ' ')
.replaceAll('NOT', '!')
.split(/OR|AND/);
for (var i = 0; i < sxs.length; i++)
{
const sx = sxs[i].trim();
if (sx.startsWith('!')) if (sx.startsWith('!'))
continue; continue;
sx = sx.substr(sx.indexOf('"')+1); const selector = sx.substr(0, sx.indexOf(' ')).trim();
sx = sx.substr(0, sx.indexOf('"')); var content = sx.substr(sx.indexOf(' ')+1).trim();
sx = sx.trim().split(' '); content = normalize_for_search(content);
for (var j = 0; j < sx.length; j++) { const text_words = content.split(' ');
const w = sx[j]; switch (selector) {
case 'words':
case 'hasall':
case 'anyword':
case 'hasany':
for (var j = 0; j < text_words.length; j++) {
const w = text_words[j];
for (var k = 0; k < hwords.length; k++) { for (var k = 0; k < hwords.length; k++) {
const h = hwords[k]; const h = hwords[k];
if (normalize_for_search(h).includes(normalize_for_search(w))) if (normalize_for_search(h) === w)
html = html.replaceAll(h, '<span class="sr">'+h+'</span>'); html = html.replace(new RegExp("\\b"+h+"\\b", 'g'),
'<span class="sr">'+h+'</span>');
} }
} }
} break;
} else { case 'has':
search = search.toLowerCase(); case 'includes':
const words = search.split(' '); case 'contains':
for (var i = 0; i < words.length; i++) { // TODO: implement contains "sr"
const w = words[i]; break;
for (var j = 0; j < hwords.length; j++) { case 'regex':
const h = hwords[j]; // TODO: implement regex "sr"
if (normalize_for_search(h).includes(normalize_for_search(w))) break;
html = html.replaceAll(h, '<span class="sr">'+h+'</span>');
}
} }
} }
return html; return html;
}
if (search === '<empty>')
return html;
search = search.replaceAll('"', '');
return window.view.instance.html_add_search_spans(html, `expr: words "${search}"`);
}, },
do: { do: {
users: { users: {