skymvc开发手册之关键词库cls_keywords.php
4.23 skymvc的关键词库cls_keywords.php
$this->loadClass("keywords");
$this->keywords->setDir(ROOT_PATH."config/dict/");
//生成所有词库
$this->keywords->save_all();
//生成基础词库
$this->keywords->save_base();
//生成自定义词库
$this->keywords->save_my();
//根据基础库分词
$base=$this->keywords->get("豆豉辣椒火焙鱼我是中国人'吮指排骨','开背烤鸡'");
//根据自定义分词
$my=$this->keywords->set("my")->get("豆豉辣椒火焙鱼我是中国人'吮指排骨','开背烤鸡'");
//根据所有词库分词
$all=$this->keywords->set("all")->get("豆豉辣椒火焙鱼我是中国人'吮指排骨','开背烤鸡'");
print_r($my);
print_r($all);
print_r($base);
cls_keywords.php
<?php
/**
*提取关键字类
*dict 词典目录 base基础词典 my.txt用户自定义词典
*
* Copyright 雷日锦 362606856@qq.com
**/
class keywords{
public $run_time;//运行时间
public $dict;//词典内容
public $dir;//词典文件夹
public $base;
public $my;
public function __construct(){
}
public function setDir($dir="dict/"){
$this->dir=$dir?$dir:"dict/";
$this->set();
}
/**
*编译全部词典
*/
public function save_all(){
set_time_limit(0);
if(file_exists($this->dir."dict_base.php") && file_exists($this->dir."dict_my.php")){
$c1=filemtime($this->dir."dict_all.php");
$c2=filemtime($this->dir."base.txt") ;
$c3=filemtime($this->dir."my.txt") ;
if($c1>$c2 && $c1>$c3){
require($this->dir."dict_all.php");
return $this;
}
}
$this->save_my();
$my=$this->my;
$this->save_base();
$base=$this->base;
$my=$my."\r\n".$base;
$x=explode("\r\n",$my);
$temp=array();
foreach($x as $v){
if(!empty($v)){
$v=trim($v);
$temp[strlen($v)][]=$v;
}
}
$arr=array();
krsort($temp);
if($temp){
foreach($temp as $d){
foreach($d as $c){
$arr[]=$c;
}
}
}
$arr=array_unique($arr);
$data="";
$data.="<?php\r\n";
$data.='$dict_all=array('."\r\n";
foreach($arr as $k=>$v){
if($k==0){
$data.="'$v'";
}else{
$data.=",'$v'";
}
}
$data.="\r\n);\r\n?>";
file_put_contents($this->dir."dict_all.php",$data);
return $this;
}
/*
*编译基础词典
*/
public function save_base(){
set_time_limit(0);
if(file_exists($this->dir."dict_base.php")){
$c1=filemtime($this->dir."dict_base.php");
$c2=filemtime($this->dir."base.txt") ;
if($c1>$c2){
require($this->dir."dict_base.php");
$this->base=file_get_contents($this->dir."base.txt");
return $this;
}
}
$my=file_get_contents($this->dir."base.txt");
$x=explode("\r\n",$my);
$temp=array();
foreach($x as $v){
if(!empty($v)){
$v=trim($v);
$temp[strlen($v)][]=$v;
}
}
$arr=array();
krsort($temp);
if($temp){
foreach($temp as $d){
foreach($d as $c){
$arr[]=$c;
}
}
}
$arr=array_unique($arr);
$data="";
$data.="<?php\r\n";
$data.='$dict_base=array('."\r\n";
foreach($arr as $k=>$v){
if($k==0){
$data.="'$v'";
}else{
$data.=",'$v'";
}
}
$data.="\r\n);\r\n?>";
file_put_contents($this->dir."dict_base.php",$data);
$this->base=$my;
return $this;
}
/*
*编译用户自定义词典
*/
public function save_my(){
set_time_limit(0);
if(file_exists($this->dir."dict_my.php")){
$c1=filemtime($this->dir."dict_my.php");
$c2=filemtime($this->dir."my.txt") ;
if($c1>$c2){
require($this->dir."dict_my.php");
$this->my=file_get_contents($this->dir."my.txt");
return $this;
}
}
$my=file_get_contents($this->dir."my.txt");
$x=explode("\r\n",$my);
$temp=array();
foreach($x as $v){
if(!empty($v)){
$v=trim($v);
$temp[strlen($v)][]=$v;
}
}
$arr=array();
krsort($temp);
if($temp){
foreach($temp as $d){
foreach($d as $c){
$arr[]=$c;
}
}
}
$arr=array_unique($arr);
$data="";
$data.="<?php\r\n";
$data.='$dict_my=array('."\r\n";
foreach($arr as $k=>$v){
if($k==0){
$data.="'$v'";
}else{
$data.=",'$v'";
}
}
$data.="\r\n);\r\n?>";
file_put_contents($this->dir."dict_my.php",$data);
$this->my=$my;
return $this;
}
/*
*设置要使用的词典
*/
public function set($type='base'){
switch($type){
case "my":
if(file_exists($this->dir."dict_my.php")){
require($this->dir."dict_my.php");
$this->dict=$dict_my;
}
break;
case 'all':
if(file_exists($this->dir."dict_all.php")){
require($this->dir."dict_all.php");
$this->dict=$dict_all;
}
break;
default :
if(file_exists($this->dir."dict_base.php")){
require($this->dir."dict_base.php");
$this->dict=$dict_base;
}
break;
}
return $this;
}
/*
*获取关键词
*$str 文本内容
*$limit 关键词数量
*/
public function get($str,$limit=10000){
$str=strip_tags($str);
$start=microtime(true);
$i=0;
$temp=array();
if(empty($this->dict)) return false;
foreach($this->dict as $k=>$v){
if(!empty($v)){
if(strpos($str,$v)!==false){
$temp[]=$v;
$i++;
if($i>$limit) break;
}
}
}
$this->run_time=microtime(true)-$start;
return $temp;
}
}
?>