PHP

PHP高效批量字符串替换类

Elysian
2021-11-17 / 0 评论 / 141 阅读 / 正在检测是否收录...

以前是在.net里面写的高效替换字符串的类
后因PHP要使用类似功能,所以重写了一个PHP版本,但是PHP没有类似字典的高效数据存储方式,所以性能还能优化
暂贴着备忘
原理是使用了树数据结构


#region 高效字符串替换类



class TrieNode
{
    public $m_end;
    public $m_values;

    public function __construct()
    {
        $this->m_end = false;
        $this->m_values = [];
    }

    public function TryGetValue($key, &$node)
    {
        if (array_key_exists($key, $this->m_values)) {
            $node = $this->m_values[$key];
            return true;
        }
        $node = new TrieNode();
        return false;
    }
}


class TrieFilter extends TrieNode
{
    private $ignorecase = false;

    public function __construct($keys, $isgore = false)
    {
        parent::__construct();
        $this->ignorecase = $isgore;
        $this->AddKey($keys);
    }

    private function AddKey($keys)
    {
        foreach ($keys as $j => $key) {

            if (empty($key)) {
                return;
            }
            $node = $this;
            $key = strsplit($key);
            foreach ($key as $i => $v) {
                $c = $this->GetChar($v);
                if (!$node->TryGetValue($c, $subnode)) {
                    $subnode = new TrieNode();
                    $node->m_values[$c] = $subnode;
                }
                $node = $subnode;
            }
            $node->m_end = true;
        }
    }


    private function GetChar($car)
    {

        if ($this->ignorecase) {
            return strtolower($car);
        }
        return $car;
    }



    public function Replace($text, $d, $onlyone = true, $excludehtml = true)
    {
        $ori = $text;

        $onlysize = 0;
        $length = mb_strlen($text);
        $textArr = strsplit($text);

        foreach ($textArr as $i => $v) {
            $node = null;

            if ($this->TryGetValue($this->GetChar($v), $node)) {
                for ($j = $i + 1; $j < $length; $j++) {
                    if ($node->TryGetValue($this->GetChar($textArr[$j]), $node)) {
                        if ($node->m_end) {
                            if (count($node->m_values) > 0 && $length > $j + 1 && array_key_exists($this->GetChar($textArr[$j + 1]), $node->m_values)) {
                                if ($j + 1 >= $length) {
                                    return $ori;
                                }
                                continue;
                            }

                            $isin = $excludehtml;
                            if ($excludehtml) {
                                $start = mb_substr($text, 0, $i);

                                if (StringCount($start, "<a") == StringCount($start, "</a>") && StringCount($start, "<") == StringCount($start, ">")) {
                                    $isin = false;
                                }
                            }

                            if (!$isin) {

                                if (!empty($d[mb_substr($text, $i, $j + 1 - $i)])) {

                                    $mvalue = $d[mb_substr($text, $i, $j + 1 - $i)];
                                    $key = mb_substr($text, $i, $j + 1 - $i);
                                    $ori = mb_substr($ori, 0, $i + $onlysize) . $mvalue . mb_substr($ori, $j + 1 + $onlysize);
                                    $onlysize += mb_strlen($mvalue) - mb_strlen($key);
                                    if ($onlyone) {
                                        $d[mb_substr($text, $i, $j + 1 - $i)] = "";
                                    }
                                }
                            }
                            $i = $j;
                        }

                        if ($j + 1 >= $length) {
                            return $ori;
                        }
                    } else {
                        if ($j + 1 >= $length) {
                            return $ori;
                        }
                        break;
                    }
                }
            }
        }

        return $ori;
    }
}




function StringCount($value, $find)
{
    $value = strtolower($value);
    $find = strtolower($find);
    $count = 0; //计数器         

    $vlen = mb_strlen($value);
    $flen = mb_strlen($find);
    for ($i = 0; $i <= $vlen - $flen; $i++) {
        if (mb_substr($value, $i, $flen) == $find) {
            $count++;
        }
    }
    return $count;
}


function strsplit($str)
{
    return preg_split('/(?<!^)(?!$)/u', $str);
}

function tmsectime()
{
    list($msec, $sec) = explode(' ', microtime());
    $msectime = (float) sprintf('%.0f', (floatval($msec) + floatval($sec)) * 1000);
    return $msectime;
}


/**
 * 高效批量字符串替换
 * @param string $text 字符串内容
 * @param array $d 替换字典,数组结构,李['张三' => '李四', '王五' => '傻六'];意思就是把张三替换成李四,王五替换成傻六
 * @param bool $onlyone 是否每个词只替换一次
 * @param bool $excludehtml 是否排除a标签内的内容,例如<a title="张三哈哈哈">王五哈哈哈</a>,这种,就只会替换王五,这个参数主要是替换带连接内容使用
 * @return string
 */
function replace_batch($text, $d, $onlyone = true, $excludehtml = true)
{
    $cacheoption = [
        'type' => 'File',
        'path' => CACHE_PATH,
        'prefix' => '',
        'expire' => 0
    ];
    // $min = tmsectime();
    $tf = cache("tf_TrieFilter", '', $cacheoption);

    if (!$tf) {
        $tf = new TrieFilter(array_keys($d));
        cache("tf_TrieFilter", $tf, $cacheoption);
    }
    // $max = tmsectime();
    // var_dump($max - $min);

    $s = $tf->Replace($text, $d, $onlyone, $excludehtml);

    // var_dump(tmsectime() - $max);
    // exit();
    return $s;
}



#endregion
0

评论

博主关闭了所有页面的评论