Suppord word sound normalization in regex searches
* For example, a single (crypto|nft) regex will make "cripto" or cr1pto also match.
This commit is contained in:
parent
05b8d6e6da
commit
d8170d81d8
|
@ -192,7 +192,7 @@ if (isset($_GET['profile']) && trim($_GET['profile']) != '')
|
|||
}
|
||||
}
|
||||
} else if ($qt === 'expr')
|
||||
$matches = matches_comparing_expression($q, normalize_for_search($a_note));
|
||||
$matches = matches_comparing_expression($q, $a_note);
|
||||
|
||||
if ($matches) {
|
||||
$filtered_accounts []= $account['id'];
|
||||
|
|
68
base.php
68
base.php
|
@ -378,9 +378,29 @@ if (!function_exists('str_starts_with')) {
|
|||
}
|
||||
}
|
||||
|
||||
function set_chat_at($str, $i, $c) {
|
||||
return substr($str, 0, $i).$c.substr($str, $i+1);
|
||||
}
|
||||
|
||||
function normalize_word_sound($word, $cback=null) {
|
||||
$hashes = [
|
||||
'4:a', '3:e', '1:i', '0:o',
|
||||
'5:s', 'b:v', '8:b', 'k:c',
|
||||
'y:i', 'que:kee', 'q:k',
|
||||
];
|
||||
foreach ($hashes as $hash) {
|
||||
$hash = explode(':', $hash);
|
||||
if ($cback !== null)
|
||||
$word = $cback($word, $hash);
|
||||
else $word = str_replace($hash[0], $hash[1], $word);
|
||||
}
|
||||
return $word;
|
||||
}
|
||||
|
||||
function normalize_for_search($str) {
|
||||
if (trim($str) === '') return '';
|
||||
$str = strtolower(remove_accents(trim($str)));
|
||||
$str = strip_tags(trim($str));
|
||||
$str = strtolower(remove_accents($str));
|
||||
$str = preg_replace('/[^a-z0-9]+/', ' ', $str);
|
||||
$str = preg_replace('/\s+/', ' ', $str);
|
||||
$words = explode(' ', trim($str));
|
||||
|
@ -388,15 +408,7 @@ function normalize_for_search($str) {
|
|||
foreach ($words as $word) {
|
||||
if (trim($word) === '')
|
||||
continue;
|
||||
$word = str_replace('4', 'a', $word);
|
||||
$word = str_replace('3', 'e', $word);
|
||||
$word = str_replace('1', 'i', $word);
|
||||
$word = str_replace('0', 'o', $word);
|
||||
$word = str_replace('5', 's', $word);
|
||||
$word = str_replace('b', 'v', $word);
|
||||
$word = str_replace('8', 'b', $word);
|
||||
$word = str_replace('k', 'c', $word);
|
||||
$word = str_replace('que', 'kee', $word);
|
||||
$word = normalize_word_sound($word);
|
||||
$nword = '';
|
||||
for ($i = 0; $i < strlen($word); $i++) {
|
||||
if ($i === 0) {
|
||||
|
@ -415,9 +427,25 @@ function normalize_for_search($str) {
|
|||
function parse_comparing_expression($expr) {
|
||||
$expr = preg_replace('/\n/', ' ', $expr);
|
||||
$expr = preg_replace('/\s+/', ' ', $expr);
|
||||
$p_expr = preg_replace('/\(|\)/', ' ', $expr);
|
||||
$p_expr = $expr;
|
||||
$quot = false;
|
||||
for ($i = 0; $i < strlen($p_expr); $i++) {
|
||||
if ($p_expr[$i] === '"') {
|
||||
$quot = !$quot;
|
||||
continue;
|
||||
}
|
||||
if ($quot) {
|
||||
if ($p_expr[$i] === '(')
|
||||
$p_expr = set_chat_at($p_expr, $i, 'º');
|
||||
else if ($p_expr[$i] === ')')
|
||||
$p_expr = set_chat_at($p_expr, $i, 'ª');
|
||||
}
|
||||
}
|
||||
$p_expr = preg_replace('/\(|\)/', ' ', $p_expr);
|
||||
$p_expr = preg_replace('/\s+/', ' ', $p_expr);
|
||||
$p_expr = preg_split('/OR|AND(\sNOT)?/', $p_expr);
|
||||
$p_expr = str_replace('º', '(', $p_expr);
|
||||
$p_expr = str_replace('ª', ')', $p_expr);
|
||||
foreach ($p_expr as &$e) $e = trim($e);
|
||||
return [
|
||||
'original' => $expr,
|
||||
|
@ -429,6 +457,7 @@ function matches_comparing_expression($expr, $text) {
|
|||
if (gettype($expr) === 'string')
|
||||
$expr = parse_comparing_expression($expr);
|
||||
$result = $expr['original'];
|
||||
$text = normalize_for_search($text);
|
||||
$text_words = explode(' ', $text);
|
||||
foreach ($expr['parsed'] as $t)
|
||||
{
|
||||
|
@ -478,6 +507,23 @@ function matches_comparing_expression($expr, $text) {
|
|||
case 'matches':
|
||||
if (strlen($content) <= 0)
|
||||
continue 2;
|
||||
$content = normalize_word_sound($content, function($w, $h) {
|
||||
if ($h[0] === 'b')
|
||||
return preg_replace('/([^\\\])b/', '\1'.$h[1], $w);
|
||||
if (preg_match('/^\d$/', $h[0])) {
|
||||
$b = false;
|
||||
for ($i = 0; $i < strlen($w); $i++) {
|
||||
if (in_array($w[$i], ['{','}','[',']'])) {
|
||||
$b = !$b;
|
||||
continue;
|
||||
}
|
||||
if (!$b && $w[$i] === $h[0])
|
||||
$w = set_chat_at($w, $i, $h[1]);
|
||||
}
|
||||
return $w;
|
||||
}
|
||||
return str_replace($h[0], $h[1], $w);
|
||||
});
|
||||
if ($content[0] != '^')
|
||||
$content = '^.*'.$content;
|
||||
if ($content[strlen($content)-1] != '$')
|
||||
|
|
22
js/base.php
22
js/base.php
|
@ -139,16 +139,18 @@ function insert_string(main_string, ins_string, pos) {
|
|||
return main_string.slice(0, pos) + ins_string + main_string.slice(pos);
|
||||
}
|
||||
|
||||
function normalize_word_sound(word) {
|
||||
word = word.replaceAll('4','a');
|
||||
word = word.replaceAll('3','e');
|
||||
word = word.replaceAll('1','i');
|
||||
word = word.replaceAll('0','o');
|
||||
word = word.replaceAll('5','s');
|
||||
word = word.replaceAll('b','v');
|
||||
word = word.replaceAll('8','b');
|
||||
word = word.replaceAll('k','c');
|
||||
word = word.replaceAll('que','kee');
|
||||
function normalize_word_sound(word, cback) {
|
||||
const hashes = [
|
||||
'4:a', '3:e', '1:i', '0:o',
|
||||
'5:s', 'b:v', '8:b', 'k:c',
|
||||
'y:i', 'que:kee', 'q:k',
|
||||
];
|
||||
for (var i = 0; i < hashes.length; i++) {
|
||||
const hash = hashes[i].split(':');
|
||||
if (cback !== undefined)
|
||||
word = cback(word, hash);
|
||||
else word = word.replaceAll(hash[0], hash[1]);
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue