以前是在.net里面写的高效替换字符串的类
后因PHP要使用类似功能,所以重写了一个PHP版本,但是PHP没有类似字典的高效数据存储方式,所以性能还能优化
暂贴着备忘
原理是使用了树数据结构
#region 高效字符串替换类
class TrieNode
{
public $m_end;
public $m_values;
public function __construct()
{
$this->m_end = false;
$this->m_values = [];
}
public function TryGetValue($key, &$node)
{
if (array_key_exists($key, $this->m_values)) {
$node = $this->m_values[$key];
return true;
}
$node = new TrieNode();
return false;
}
}
class TrieFilter extends TrieNode
{
private $ignorecase = false;
public function __construct($keys, $isgore = false)
{
parent::__construct();
$this->ignorecase = $isgore;
$this->AddKey($keys);
}
private function AddKey($keys)
{
foreach ($keys as $j => $key) {
if (empty($key)) {
return;
}
$node = $this;
$key = strsplit($key);
foreach ($key as $i => $v) {
$c = $this->GetChar($v);
if (!$node->TryGetValue($c, $subnode)) {
$subnode = new TrieNode();
$node->m_values[$c] = $subnode;
}
$node = $subnode;
}
$node->m_end = true;
}
}
private function GetChar($car)
{
if ($this->ignorecase) {
return strtolower($car);
}
return $car;
}
public function Replace($text, $d, $onlyone = true, $excludehtml = true)
{
$ori = $text;
$onlysize = 0;
$length = mb_strlen($text);
$textArr = strsplit($text);
foreach ($textArr as $i => $v) {
$node = null;
if ($this->TryGetValue($this->GetChar($v), $node)) {
for ($j = $i + 1; $j < $length; $j++) {
if ($node->TryGetValue($this->GetChar($textArr[$j]), $node)) {
if ($node->m_end) {
if (count($node->m_values) > 0 && $length > $j + 1 && array_key_exists($this->GetChar($textArr[$j + 1]), $node->m_values)) {
if ($j + 1 >= $length) {
return $ori;
}
continue;
}
$isin = $excludehtml;
if ($excludehtml) {
$start = mb_substr($text, 0, $i);
if (StringCount($start, "<a") == StringCount($start, "</a>") && StringCount($start, "<") == StringCount($start, ">")) {
$isin = false;
}
}
if (!$isin) {
if (!empty($d[mb_substr($text, $i, $j + 1 - $i)])) {
$mvalue = $d[mb_substr($text, $i, $j + 1 - $i)];
$key = mb_substr($text, $i, $j + 1 - $i);
$ori = mb_substr($ori, 0, $i + $onlysize) . $mvalue . mb_substr($ori, $j + 1 + $onlysize);
$onlysize += mb_strlen($mvalue) - mb_strlen($key);
if ($onlyone) {
$d[mb_substr($text, $i, $j + 1 - $i)] = "";
}
}
}
$i = $j;
}
if ($j + 1 >= $length) {
return $ori;
}
} else {
if ($j + 1 >= $length) {
return $ori;
}
break;
}
}
}
}
return $ori;
}
}
function StringCount($value, $find)
{
$value = strtolower($value);
$find = strtolower($find);
$count = 0; //计数器
$vlen = mb_strlen($value);
$flen = mb_strlen($find);
for ($i = 0; $i <= $vlen - $flen; $i++) {
if (mb_substr($value, $i, $flen) == $find) {
$count++;
}
}
return $count;
}
function strsplit($str)
{
return preg_split('/(?<!^)(?!$)/u', $str);
}
function tmsectime()
{
list($msec, $sec) = explode(' ', microtime());
$msectime = (float) sprintf('%.0f', (floatval($msec) + floatval($sec)) * 1000);
return $msectime;
}
/**
* 高效批量字符串替换
* @param string $text 字符串内容
* @param array $d 替换字典,数组结构,李['张三' => '李四', '王五' => '傻六'];意思就是把张三替换成李四,王五替换成傻六
* @param bool $onlyone 是否每个词只替换一次
* @param bool $excludehtml 是否排除a标签内的内容,例如<a title="张三哈哈哈">王五哈哈哈</a>,这种,就只会替换王五,这个参数主要是替换带连接内容使用
* @return string
*/
function replace_batch($text, $d, $onlyone = true, $excludehtml = true)
{
$cacheoption = [
'type' => 'File',
'path' => CACHE_PATH,
'prefix' => '',
'expire' => 0
];
// $min = tmsectime();
$tf = cache("tf_TrieFilter", '', $cacheoption);
if (!$tf) {
$tf = new TrieFilter(array_keys($d));
cache("tf_TrieFilter", $tf, $cacheoption);
}
// $max = tmsectime();
// var_dump($max - $min);
$s = $tf->Replace($text, $d, $onlyone, $excludehtml);
// var_dump(tmsectime() - $max);
// exit();
return $s;
}
#endregion
评论