Implemented more complex expressions such as "regex" + improvements on matches

This commit is contained in:
Bofh 2022-12-15 03:23:55 +01:00
parent e99c1bfc89
commit 3e1f3d01e0
2 changed files with 113 additions and 80 deletions

117
base.php
View File

@ -380,13 +380,12 @@ if (!function_exists('str_starts_with')) {
function normalize_for_search($str) {
if (trim($str) === '') return '';
$str = preg_replace('/\s\s+/', ' ', trim($str));
$words = explode(' ', $str);
$str = strtolower(remove_accents(trim($str)));
$str = preg_replace('/[^a-z0-9]+/', ' ', $str);
$str = preg_replace('/\s+/', ' ', $str);
$words = explode(' ', trim($str));
$newwords = [];
foreach ($words as $word) {
$word = remove_accents($word);
$word = strtolower($word);
$word = preg_replace('/[^a-z0-9]+/', '', $word);
if (trim($word) === '')
continue;
$word = str_replace('4', 'a', $word);
@ -411,35 +410,15 @@ function normalize_for_search($str) {
}
function parse_comparing_expression($expr) {
$lvl = 0;
$str_levels = [];
for ($i = 0; $i < strlen($expr); $i++) {
$s = $expr[$i];
if ($s === '(') {
$lvl++; continue;
} else if ($s === ')') {
$lvl--; continue;
}
if (!isset($str_levels[$lvl]))
$str_levels[$lvl] = '';
$str_levels[$lvl] .= $s;
}
foreach ($str_levels as $key => $val) {
$val = trim($val);
if (strpos($val, 'OR') === 0)
$val = trim(substr($val, 2));
else if (strpos($val, 'AND') === 0)
$val = trim(substr($val, 3));
$val = str_replace('OR', '[[<SEP>]]', $val);
$val = str_replace('AND', '[[<SEP>]]', $val);
$val = explode('[[<SEP>]]', $val);
foreach ($val as &$v)
$v = trim($v);
$str_levels[$key] = $val;
}
$expr = preg_replace('/\n/', ' ', $expr);
$expr = preg_replace('/\s+/', ' ', $expr);
$p_expr = preg_replace('/\(|\)/', ' ', $expr);
$p_expr = preg_replace('/\s+/', ' ', $p_expr);
$p_expr = preg_split('/OR|AND(\sNOT)?/', $p_expr);
foreach ($p_expr as &$e) $e = trim($e);
return [
'original' => $expr,
'parsed' => $str_levels
'parsed' => $p_expr,
];
}
@ -448,30 +427,66 @@ function matches_comparing_expression($expr, $text) {
$expr = parse_comparing_expression($expr);
$result = $expr['original'];
$text_words = explode(' ', $text);
foreach ($expr['parsed'] as $lvl => $exl) {
foreach ($exl as &$t) {
if (strpos($t, 'words ') === 0) {
$w = substr($t, strlen('words')+1);
$ws = explode(' ', trim(trim($w, '"')));
$haveall = true;
foreach ($expr['parsed'] as $t)
{
$bool = false;
$t = trim($t);
$selector = substr($t, 0, strpos($t, ' '));
$content = substr($t, strpos($t, ' ')+1);
$content = trim(trim(trim($content), '"'));
switch ($selector)
{
case 'words':
case 'hasall':
$ws = explode(' ', $content);
$hasall = true;
foreach ($ws as $w) {
if (!in_array(normalize_for_search($w), $text_words)) {
$haveall = false;
$hasall = false;
break;
}
}
$result = str_replace($t, $haveall ? 'true' : 'false', $result);
} else if (strpos($t, 'contains ') === 0) {
$w = substr($t, strlen('contains')+1);
$w = trim(trim($w, '"'));
$contains = strpos(' '.$text.' ',
' '.normalize_for_search($w).' ') !== false;
$result = str_replace($t, $contains ? 'true' : 'false', $result);
$bool = $hasall;
break;
case 'anyword':
case 'hasany':
$ws = explode(' ', $content);
$hasany = false;
foreach ($ws as $w) {
if (in_array(normalize_for_search($w), $text_words)) {
$hasany = true;
break;
}
}
$bool = $hasany;
break;
case 'has':
case 'includes':
case 'contains':
$bool = strpos(' '.$text.' ',
' '.normalize_for_search($content).' ') !== false;
break;
case 'regex':
case 'matches':
if (strlen($content) <= 0)
continue 2;
if ($content[0] != '^')
$content = '^.*'.$content;
if ($content[strlen($content)-1] != '$')
$content .= '.*$';
$bool = preg_match('#'.$content.'#', $text) === 1;
break;
}
$result = str_replace('OR', '|', $result);
$result = str_replace('AND', '&', $result);
$result = str_replace($t, $bool ? 'true' : 'false', $result);
}
$result = str_replace('OR', '||', $result);
$result = str_replace('AND', '&&', $result);
$result = str_replace('NOT ', '!', $result);
$result = str_replace('NOT', '!', $result);
try {
eval('$result = '.$result.';');
} catch (\Throwable $e) {
@ -559,14 +574,12 @@ function cronjob_db_create($software, $instance, $sql, $time=3600) {
if (!file_exists($dir_crons_db))
mkdir($dir_crons_db);
$cron_file = $software.','.$instance.','.$job_key;
$result_file = $dir_crons_db.'/'.$cron_file.','.$time;
$touch_1970 = !file_exists($result_file);
foreach (scandir($dir_crons_db) as $fl) {
if (strpos($fl, $cron_file) !== false)
unlink($dir_crons_db.'/'.$fl);
}
$touch_1970 = false;
$result_file = $dir_crons_db.'/'.$cron_file.','.$time;
if (!file_exists($result_file))
$touch_1970 = true;
file_put_contents($result_file, $sql);
if ($touch_1970) touch($result_file, 1000);
return $cron_file;

View File

@ -37,39 +37,59 @@ window.view.instance = {
},
html_add_search_spans: function(html, search) {
search = search.trim();
if (search.startsWith('expr:'))
{
const hwords = html.split(/\b/);
if (search.startsWith('expr:')) {
search = search.substr(5).trim();
var sxs = search.replaceAll('(', '').replaceAll(')','').split(/OR|AND/);
for (var i = 0; i < sxs.length; i++) {
var sx = sxs[i].trim();
var sxs = search
.replaceAll('\n', ' ')
.replaceAll('(', '')
.replaceAll(')','')
.replaceAll(/\s+/g, ' ')
.replaceAll('NOT', '!')
.split(/OR|AND/);
for (var i = 0; i < sxs.length; i++)
{
const sx = sxs[i].trim();
if (sx.startsWith('!'))
continue;
sx = sx.substr(sx.indexOf('"')+1);
sx = sx.substr(0, sx.indexOf('"'));
sx = sx.trim().split(' ');
for (var j = 0; j < sx.length; j++) {
const w = sx[j];
const selector = sx.substr(0, sx.indexOf(' ')).trim();
var content = sx.substr(sx.indexOf(' ')+1).trim();
content = normalize_for_search(content);
const text_words = content.split(' ');
switch (selector) {
case 'words':
case 'hasall':
case 'anyword':
case 'hasany':
for (var j = 0; j < text_words.length; j++) {
const w = text_words[j];
for (var k = 0; k < hwords.length; k++) {
const h = hwords[k];
if (normalize_for_search(h).includes(normalize_for_search(w)))
html = html.replaceAll(h, '<span class="sr">'+h+'</span>');
if (normalize_for_search(h) === w)
html = html.replace(new RegExp("\\b"+h+"\\b", 'g'),
'<span class="sr">'+h+'</span>');
}
}
}
} else {
search = search.toLowerCase();
const words = search.split(' ');
for (var i = 0; i < words.length; i++) {
const w = words[i];
for (var j = 0; j < hwords.length; j++) {
const h = hwords[j];
if (normalize_for_search(h).includes(normalize_for_search(w)))
html = html.replaceAll(h, '<span class="sr">'+h+'</span>');
}
break;
case 'has':
case 'includes':
case 'contains':
// TODO: implement contains "sr"
break;
case 'regex':
// TODO: implement regex "sr"
break;
}
}
return html;
}
if (search === '<empty>')
return html;
search = search.replaceAll('"', '');
return window.view.instance.html_add_search_spans(html, `expr: words "${search}"`);
},
do: {
users: {