Implemented more complex expressions such as "regex" + improvements on matches
This commit is contained in:
parent
e99c1bfc89
commit
3e1f3d01e0
117
base.php
117
base.php
|
@ -380,13 +380,12 @@ if (!function_exists('str_starts_with')) {
|
||||||
|
|
||||||
function normalize_for_search($str) {
|
function normalize_for_search($str) {
|
||||||
if (trim($str) === '') return '';
|
if (trim($str) === '') return '';
|
||||||
$str = preg_replace('/\s\s+/', ' ', trim($str));
|
$str = strtolower(remove_accents(trim($str)));
|
||||||
$words = explode(' ', $str);
|
$str = preg_replace('/[^a-z0-9]+/', ' ', $str);
|
||||||
|
$str = preg_replace('/\s+/', ' ', $str);
|
||||||
|
$words = explode(' ', trim($str));
|
||||||
$newwords = [];
|
$newwords = [];
|
||||||
foreach ($words as $word) {
|
foreach ($words as $word) {
|
||||||
$word = remove_accents($word);
|
|
||||||
$word = strtolower($word);
|
|
||||||
$word = preg_replace('/[^a-z0-9]+/', '', $word);
|
|
||||||
if (trim($word) === '')
|
if (trim($word) === '')
|
||||||
continue;
|
continue;
|
||||||
$word = str_replace('4', 'a', $word);
|
$word = str_replace('4', 'a', $word);
|
||||||
|
@ -411,35 +410,15 @@ function normalize_for_search($str) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function parse_comparing_expression($expr) {
|
function parse_comparing_expression($expr) {
|
||||||
$lvl = 0;
|
$expr = preg_replace('/\n/', ' ', $expr);
|
||||||
$str_levels = [];
|
$expr = preg_replace('/\s+/', ' ', $expr);
|
||||||
for ($i = 0; $i < strlen($expr); $i++) {
|
$p_expr = preg_replace('/\(|\)/', ' ', $expr);
|
||||||
$s = $expr[$i];
|
$p_expr = preg_replace('/\s+/', ' ', $p_expr);
|
||||||
if ($s === '(') {
|
$p_expr = preg_split('/OR|AND(\sNOT)?/', $p_expr);
|
||||||
$lvl++; continue;
|
foreach ($p_expr as &$e) $e = trim($e);
|
||||||
} else if ($s === ')') {
|
|
||||||
$lvl--; continue;
|
|
||||||
}
|
|
||||||
if (!isset($str_levels[$lvl]))
|
|
||||||
$str_levels[$lvl] = '';
|
|
||||||
$str_levels[$lvl] .= $s;
|
|
||||||
}
|
|
||||||
foreach ($str_levels as $key => $val) {
|
|
||||||
$val = trim($val);
|
|
||||||
if (strpos($val, 'OR') === 0)
|
|
||||||
$val = trim(substr($val, 2));
|
|
||||||
else if (strpos($val, 'AND') === 0)
|
|
||||||
$val = trim(substr($val, 3));
|
|
||||||
$val = str_replace('OR', '[[<SEP>]]', $val);
|
|
||||||
$val = str_replace('AND', '[[<SEP>]]', $val);
|
|
||||||
$val = explode('[[<SEP>]]', $val);
|
|
||||||
foreach ($val as &$v)
|
|
||||||
$v = trim($v);
|
|
||||||
$str_levels[$key] = $val;
|
|
||||||
}
|
|
||||||
return [
|
return [
|
||||||
'original' => $expr,
|
'original' => $expr,
|
||||||
'parsed' => $str_levels
|
'parsed' => $p_expr,
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -448,30 +427,66 @@ function matches_comparing_expression($expr, $text) {
|
||||||
$expr = parse_comparing_expression($expr);
|
$expr = parse_comparing_expression($expr);
|
||||||
$result = $expr['original'];
|
$result = $expr['original'];
|
||||||
$text_words = explode(' ', $text);
|
$text_words = explode(' ', $text);
|
||||||
foreach ($expr['parsed'] as $lvl => $exl) {
|
foreach ($expr['parsed'] as $t)
|
||||||
foreach ($exl as &$t) {
|
{
|
||||||
if (strpos($t, 'words ') === 0) {
|
$bool = false;
|
||||||
$w = substr($t, strlen('words')+1);
|
$t = trim($t);
|
||||||
$ws = explode(' ', trim(trim($w, '"')));
|
$selector = substr($t, 0, strpos($t, ' '));
|
||||||
$haveall = true;
|
$content = substr($t, strpos($t, ' ')+1);
|
||||||
|
$content = trim(trim(trim($content), '"'));
|
||||||
|
switch ($selector)
|
||||||
|
{
|
||||||
|
case 'words':
|
||||||
|
case 'hasall':
|
||||||
|
$ws = explode(' ', $content);
|
||||||
|
$hasall = true;
|
||||||
foreach ($ws as $w) {
|
foreach ($ws as $w) {
|
||||||
if (!in_array(normalize_for_search($w), $text_words)) {
|
if (!in_array(normalize_for_search($w), $text_words)) {
|
||||||
$haveall = false;
|
$hasall = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$result = str_replace($t, $haveall ? 'true' : 'false', $result);
|
$bool = $hasall;
|
||||||
} else if (strpos($t, 'contains ') === 0) {
|
break;
|
||||||
$w = substr($t, strlen('contains')+1);
|
|
||||||
$w = trim(trim($w, '"'));
|
case 'anyword':
|
||||||
$contains = strpos(' '.$text.' ',
|
case 'hasany':
|
||||||
' '.normalize_for_search($w).' ') !== false;
|
$ws = explode(' ', $content);
|
||||||
$result = str_replace($t, $contains ? 'true' : 'false', $result);
|
$hasany = false;
|
||||||
|
foreach ($ws as $w) {
|
||||||
|
if (in_array(normalize_for_search($w), $text_words)) {
|
||||||
|
$hasany = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
$bool = $hasany;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'has':
|
||||||
|
case 'includes':
|
||||||
|
case 'contains':
|
||||||
|
$bool = strpos(' '.$text.' ',
|
||||||
|
' '.normalize_for_search($content).' ') !== false;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'regex':
|
||||||
|
case 'matches':
|
||||||
|
if (strlen($content) <= 0)
|
||||||
|
continue 2;
|
||||||
|
if ($content[0] != '^')
|
||||||
|
$content = '^.*'.$content;
|
||||||
|
if ($content[strlen($content)-1] != '$')
|
||||||
|
$content .= '.*$';
|
||||||
|
$bool = preg_match('#'.$content.'#', $text) === 1;
|
||||||
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
$result = str_replace('OR', '|', $result);
|
$result = str_replace($t, $bool ? 'true' : 'false', $result);
|
||||||
$result = str_replace('AND', '&', $result);
|
}
|
||||||
|
$result = str_replace('OR', '||', $result);
|
||||||
|
$result = str_replace('AND', '&&', $result);
|
||||||
|
$result = str_replace('NOT ', '!', $result);
|
||||||
|
$result = str_replace('NOT', '!', $result);
|
||||||
try {
|
try {
|
||||||
eval('$result = '.$result.';');
|
eval('$result = '.$result.';');
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
|
@ -559,14 +574,12 @@ function cronjob_db_create($software, $instance, $sql, $time=3600) {
|
||||||
if (!file_exists($dir_crons_db))
|
if (!file_exists($dir_crons_db))
|
||||||
mkdir($dir_crons_db);
|
mkdir($dir_crons_db);
|
||||||
$cron_file = $software.','.$instance.','.$job_key;
|
$cron_file = $software.','.$instance.','.$job_key;
|
||||||
|
$result_file = $dir_crons_db.'/'.$cron_file.','.$time;
|
||||||
|
$touch_1970 = !file_exists($result_file);
|
||||||
foreach (scandir($dir_crons_db) as $fl) {
|
foreach (scandir($dir_crons_db) as $fl) {
|
||||||
if (strpos($fl, $cron_file) !== false)
|
if (strpos($fl, $cron_file) !== false)
|
||||||
unlink($dir_crons_db.'/'.$fl);
|
unlink($dir_crons_db.'/'.$fl);
|
||||||
}
|
}
|
||||||
$touch_1970 = false;
|
|
||||||
$result_file = $dir_crons_db.'/'.$cron_file.','.$time;
|
|
||||||
if (!file_exists($result_file))
|
|
||||||
$touch_1970 = true;
|
|
||||||
file_put_contents($result_file, $sql);
|
file_put_contents($result_file, $sql);
|
||||||
if ($touch_1970) touch($result_file, 1000);
|
if ($touch_1970) touch($result_file, 1000);
|
||||||
return $cron_file;
|
return $cron_file;
|
||||||
|
|
|
@ -37,39 +37,59 @@ window.view.instance = {
|
||||||
},
|
},
|
||||||
html_add_search_spans: function(html, search) {
|
html_add_search_spans: function(html, search) {
|
||||||
search = search.trim();
|
search = search.trim();
|
||||||
|
if (search.startsWith('expr:'))
|
||||||
|
{
|
||||||
const hwords = html.split(/\b/);
|
const hwords = html.split(/\b/);
|
||||||
if (search.startsWith('expr:')) {
|
|
||||||
search = search.substr(5).trim();
|
search = search.substr(5).trim();
|
||||||
var sxs = search.replaceAll('(', '').replaceAll(')','').split(/OR|AND/);
|
var sxs = search
|
||||||
for (var i = 0; i < sxs.length; i++) {
|
.replaceAll('\n', ' ')
|
||||||
var sx = sxs[i].trim();
|
.replaceAll('(', '')
|
||||||
|
.replaceAll(')','')
|
||||||
|
.replaceAll(/\s+/g, ' ')
|
||||||
|
.replaceAll('NOT', '!')
|
||||||
|
.split(/OR|AND/);
|
||||||
|
for (var i = 0; i < sxs.length; i++)
|
||||||
|
{
|
||||||
|
const sx = sxs[i].trim();
|
||||||
if (sx.startsWith('!'))
|
if (sx.startsWith('!'))
|
||||||
continue;
|
continue;
|
||||||
sx = sx.substr(sx.indexOf('"')+1);
|
const selector = sx.substr(0, sx.indexOf(' ')).trim();
|
||||||
sx = sx.substr(0, sx.indexOf('"'));
|
var content = sx.substr(sx.indexOf(' ')+1).trim();
|
||||||
sx = sx.trim().split(' ');
|
content = normalize_for_search(content);
|
||||||
for (var j = 0; j < sx.length; j++) {
|
const text_words = content.split(' ');
|
||||||
const w = sx[j];
|
switch (selector) {
|
||||||
|
case 'words':
|
||||||
|
case 'hasall':
|
||||||
|
case 'anyword':
|
||||||
|
case 'hasany':
|
||||||
|
for (var j = 0; j < text_words.length; j++) {
|
||||||
|
const w = text_words[j];
|
||||||
for (var k = 0; k < hwords.length; k++) {
|
for (var k = 0; k < hwords.length; k++) {
|
||||||
const h = hwords[k];
|
const h = hwords[k];
|
||||||
if (normalize_for_search(h).includes(normalize_for_search(w)))
|
if (normalize_for_search(h) === w)
|
||||||
html = html.replaceAll(h, '<span class="sr">'+h+'</span>');
|
html = html.replace(new RegExp("\\b"+h+"\\b", 'g'),
|
||||||
|
'<span class="sr">'+h+'</span>');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
break;
|
||||||
} else {
|
case 'has':
|
||||||
search = search.toLowerCase();
|
case 'includes':
|
||||||
const words = search.split(' ');
|
case 'contains':
|
||||||
for (var i = 0; i < words.length; i++) {
|
// TODO: implement contains "sr"
|
||||||
const w = words[i];
|
break;
|
||||||
for (var j = 0; j < hwords.length; j++) {
|
case 'regex':
|
||||||
const h = hwords[j];
|
// TODO: implement regex "sr"
|
||||||
if (normalize_for_search(h).includes(normalize_for_search(w)))
|
break;
|
||||||
html = html.replaceAll(h, '<span class="sr">'+h+'</span>');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return html;
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (search === '<empty>')
|
||||||
|
return html;
|
||||||
|
|
||||||
|
search = search.replaceAll('"', '');
|
||||||
|
return window.view.instance.html_add_search_spans(html, `expr: words "${search}"`);
|
||||||
},
|
},
|
||||||
do: {
|
do: {
|
||||||
users: {
|
users: {
|
||||||
|
|
Loading…
Reference in New Issue