• PHP违禁词敏感词 全站文件扫描


    全站违禁词扫描下载地址

    https://files.cnblogs.com/files/kingchou/%E8%BF%9D%E7%A6%81%E8%AF%8D%E7%B1%BB%E6%96%87%E4%BB%B6.rar

    全站扫描违禁词代码

           require_once ROOT_PATH."/Banned.php";   
           $banned=new Banned();
           $banned->write_html=true;
           $banned->write_log=true;
           $banned->check_type="file";
    
           $banned->checkFileAll();
    

      

    类文件

    <?php
    date_default_timezone_set('Asia/Shanghai');
    /*error_reporting(E_WARNING);*/
    /**
     * 违禁词:
     * Created by PhpStorm.
     * Author: zhouzj -周宗君
     * Date: 2017/11/17 15:52
     */
    
    class Banned
    {
        private $data;//数据
        private $match_banned;//违禁词规则
        private $match_mingan;//敏感词规则
        private $match_field;//字符类型规则
        private $finish_path=array();//已完成的路径
        private $finish_table=array();//已完成的表
        private $logtime;
        private  $path;
        private  $check_sub_dir;
        private  $check_type;
        private  $banned_from;
        private  $clean;
        private  $write_html;
        private  $write_log;
        private  $fornum;
        private  $bannedword;
        public   $document_root;
        public $_config=array(
            "path"=>'',
            "check_sub_dir"=>array('web'),
            'check_type'=>'file',
            'banned_from'=>'table',
            'clean'=>false,
            'fornum'=>20,
            'write_html'=>false,
            'write_log'=>false,
            'bannedword'=>true,);
    
    
        /**
         *初始化方法
         */
        public function __construct($config=array())
        {
    
            if(!empty($config)) {
                $this->_config = array_merge($this->_config, $config);
            }
            $this->BannedInit();
        }
    
        /**
         * 设置key
         * @param $key
         * @param $value
         */
        public function __set($key, $value)
        {
            $this->_config[$key] = $value;
        }
    
        /**
         * 读取key
         * @param $key
         * @return mixed
         */
        public function __get($key)
        {
            return $this->_config[$key];
        }
    
        /**
         * 初始化违禁词和敏感词
         */
        private function BannedInit(){
            $this->logtime=date("YmdHis");
            $this->document_root=$_SERVER['DOCUMENT_ROOT'];
    
             //初始化加载违禁词 否则默认读取文件中的违禁词
          
    		//敏感词正则规则
    		$mingan_words= $this->getMinganWords();
    		$this->match_mingan=$this->generateRegularExpression($mingan_words);
    
    		//违禁词正则规则
    		$banned_words= $this->getBannedWords();
    		$this->match_banned=$this->generateRegularExpression($banned_words);
       
        }
    
        /**
         * 检测所有内容
         */
        public function checkAll(){
              //判断类型是数据库
             if($this->_config['check_type']=="file"){
               return  $this->checkFileAll();//检测文件
             }
        }
    
        
    
        /**
         * 检查文件
         */
        public function checkFileAll(){
            $path=$this->_config['path'];
            if(!$path){
                $path=$this->document_root."";
            }
    
            $laststr= substr($path, -1);
            if($laststr!="/"){
                $path.="/";
            }
            $dir_list=  $this->getDir($path);
            //判断是否包含子目录
            if(count($dir_list)>0){
                foreach ($dir_list as $dirkey=>$dirvalue){
                    if(empty($dirvalue)){
                        continue;
                    }
                    //判断是否是允许检测的子目录
                  //  if(in_array($dirvalue,$this->_config['check_sub_dir'])){
                        $subpath=$path.$dirvalue.'/';
    
                        $this->check_sub_dir($subpath);
                 //   }
                }
            }
            return true;
        }
    
        
        /**
         * 检测单条内容
         */
        public function check($content){
          
            if($this->_config['check_type']=="file"){
                return  $this->check_file($content);//检测文件
            }
        }
    
        
        /**
         * 检测单条文本内容
         * @param $content
         */
        public function check_text($content){
            $res_mingan=array();$res_banned=array();
            $this->__check_content($content,$res_mingan,$res_banned);
            $data['mingan']=$res_mingan;
            $data['banned']=$res_banned;
            return $data;
        }
    
        /**
         * 检测单条文件内容
         * @param $content
         */
        public function check_file($content){
           $res=  $this->__check_file($content);
           $this->set_finish_path($content);
           return $res;
        }
    
    
        /**
         * 迭代检查子文件目录
         * @param $path
         */
        function check_sub_dir($path){
            $file_list=  $this->getFile($path);//获取文件目录
            if(count($file_list)>0){
                foreach ($file_list as $filekey=>$filevalue){
                    if(empty($filevalue))
                        continue;
                    $this->__check_file($filevalue);//执行检查文件
                }
            }
            $dir_list=  $this->getDir($path);//获取文件夹目录
            if(count($dir_list)>0){
                foreach ($dir_list as $dirkey=>$dirvalue){
                    if(empty($dirvalue)){
                        continue;
                    }
                    $subpath=$path.$dirvalue.'/';
                    $this->check_sub_dir($subpath);
                }
            }
        }
    
        /**
         * 验证单个文件
         * @param $filepath
         */
        function __check_file($filepath){
            //判断文件是否已经完成检查,如果已经完成则不需要检查
            if(!$filepath) return;
    
            if(in_array($filepath,$this->finish_path))
                return;
    
            if(!file_exists($filepath)) return;
    
            if(stripos($filepath,"mingan_words.txt")>0 || stripos($filepath,"banned_words.txt")>0 ){
                return ;
            }
            //判断文件如果是 违禁词或敏感词文件则跳过不处理
            $content =  file_get_contents($filepath);
            $res_mingan=array();$res_banned=array();
            $this->__check_content($content,$res_mingan,$res_banned);
            $data=array();
            if($res_mingan || $res_banned){//如果有敏感词或违禁词则写日志
    
                $this->write_log($filepath,$res_mingan,$res_banned);
                $this->write_html($filepath,$res_mingan,$res_banned);
                $data['mingan']=$res_mingan;
                $data['banned']=$res_banned;
            }
            //执行保存文件路径
            return $data;
    
        }
    
        /**
         * 检查内容
         * @param $content
         * @param $res_mingan
         * @param $res_banned
         */
        private function __check_content($content,&$res_mingan,&$res_banned){
            //检查敏感词
            $res_mingan=$this->check_words($this->match_mingan,$content);
    
            //检查违禁词
            $res_banned=$this->check_words($this->match_banned,$content);
        }
    
        /**
         * 检查敏感词
         * @param $banned
         * @param $string
         * @return bool|string
         */
        private function check_words($banned,$string)
        {    $match_banned=array();
            //循环查出所有敏感词
    
            $new_banned=strtolower($banned);
            $i=0;
            do{
                $matches=null;
                if (!empty($new_banned) && preg_match($new_banned, $string, $matches)) {
                    $isempyt=empty($matches[0]);
                    if(!$isempyt){
                        $match_banned = array_merge($match_banned, $matches);
                        $matches_str=strtolower($this->generateRegularExpressionString($matches[0]));
                        $new_banned=str_replace("|".$matches_str."|","|",$new_banned);
                        $new_banned=str_replace("/".$matches_str."|","/",$new_banned);
                        $new_banned=str_replace("|".$matches_str."/","/",$new_banned);
                    }
                }
                $i++;
                if($i>$this->_config['fornum']){
                    $isempyt=true;
                    break;
                }
            }while(count($matches)>0 && !$isempyt);
    
            //查出敏感词
            if($match_banned){
                return $match_banned;
            }
            //没有查出敏感词
            return array();
        }
        /**
         * @describe 生成正则表达式
         * @param array $words
         * @return string
         */
        private function generateRegularExpression($words)
        {
            $regular = implode('|', array_map('preg_quote', $words));
            return "/$regular/i";
        }
        /**
         * @describe 生成正则表达式
         * @param array $words
         * @return string
         */
        private function generateRegularExpressionString($string){
              $str_arr[0]=$string;
              $str_new_arr=  array_map('preg_quote', $str_arr);
              return $str_new_arr[0];
        }
    
       
        /**
         * 写日志
         * @param $path
         * @param $content
         */
       private function write_log($location,$contentarr,$weijinciarr){
            if($this->_config['write_log']) {
                if (!$contentarr && !$weijinciarr) {
                    return;
                }
                $content = $location;
                if (count($contentarr) > 0) {
                    $content .= "," . count($contentarr) . "," . implode('|', $contentarr);
                } else {
                    $content .= ",,";
                }
                if (count($weijinciarr) > 0) {
                    $content .= "," . count($weijinciarr) . "," . implode('|', $weijinciarr);
                } else {
                    $content .= ",,";
                }
                $content .= "
    ";
                $filename =$this->document_root."/logs/file" . $this->logtime . "/file_bannwords.csv";
                /* 文件日志路径 */
            //    $file = './' . $filename;
                $file = $filename;
                if (!file_exists($file)) {
                    $pathdir = dirname($file);
                    if (!is_dir($pathdir)) {
                        mkdir($pathdir, 0775, true);
                    }
                    $content_title = "位置,敏感词数量,敏感词,违禁词数量,违禁词" . "
    ";
                    error_log(iconv('UTF-8', 'GB2312', $content_title), 3, $file);
                }
                error_log(iconv('UTF-8', 'GB2312', $content), 3, $file);
            }
        }
    
        /**
         * 打印到页面上
         * @param $filepath
         * @param $res_mingan
         * @param $res_banned
         */
       private function write_html($location,$res_mingan,$res_banned){
            if($this->_config['write_html']){
                print_r(iconv('GB2312','UTF-8',$location));
                if($res_mingan){
                    print_r("  <font color='red'>敏感词(".count($res_mingan)."):</font>".implode('|',$res_mingan));
                }
                if($res_banned){
                    print_r("  <font color='red'>违禁词(".count($res_banned)."):</font>".implode('|',$res_banned));
                }
                echo "<br>";
            }
        }
        /**
         * 保存已完成文件
         * @param $path
         */
       private function set_finish_path($path){
            if(!$path){
                return;
            }
            $content =$path. "
    ";
           $filename=$this->document_root."/logs/file" . $this->logtime . "/banned_finish_path.txt";
            /* 文件日志路径 */
           // $file ='./' . $filename;
           $file = $filename;
            if (!file_exists($file)) {
                mkdir(dirname($file), 0775, true);
            }
            error_log(iconv('GB2312','UTF-8',$content), 3, $file);
        }
    
    
        //重置已完成文件
    /*    function clean_finish_file(){
            $filename=$this->document_root."/logs/banned_finish_path.txt";
            file_put_contents($filename,'');
        }
    */
    
    
        //获取文件目录列表,该方法返回数组
        private  function getDir($dir) {
            $dirArray[]=NULL;
            if (is_dir($dir)) {
                try{
                    if (false != ($handle = @opendir($dir))) {
                        $i = 0;
                        while (false !== ($file = @readdir($handle))) {
                            //去掉"“.”、“..”以及带“.xxx”后缀的文件
                            if ($file != "." && $file != ".." && !strpos("*" . $file, ".")) {
                                $dirArray[$i] = $file;
                                $i++;
                            }
                        }
                        //关闭句柄
                        @closedir($handle);
                    }
                }catch (Exception $ex){
    
                }
            }
            return $dirArray;
        }
    
        //获取文件列表
        private  function getFile($dir) {
            $fileArray[]=NULL;
            if (false != ($handle = @opendir ( $dir ))) {
                $i=0;
                while ( false !== ($file = @readdir ( $handle )) ) {
                    //去掉"“.”、“..”以及带“.xxx”后缀的文件
                    if ($file != "." && $file != ".." && (strpos($file,".php") || strpos($file,".html"))) {
                        $fileArray[$i]=$dir.$file;
                        if($i==1000){//当同一个文件下超出1000个文件则跳出循环
                            break;
                        }
                        $i++;
                    }
                }
                //关闭句柄
                @closedir ( $handle );
            }
            return $fileArray;
        }
        //获取敏感词文件
        private   function getMinganWords(){
    
            $shehuangwords=file_get_contents($this->document_root."/words/forbidden.txt");
           // $shehuangwords=iconv("GBK","UTF-8",$shehuangwords);
            $shehuangword_arr=explode("
    ",$shehuangwords);
            return $shehuangword_arr;
        }
        //获取违禁词文件
        private  function getBannedWords(){
            if($this->_config['bannedword']){
                $guanggaowords=file_get_contents($this->document_root."/words/banned.txt");
               // $guanggaowords=iconv("GBK","UTF-8",$guanggaowords);
                $guanggaowords_arr=explode("
    ",$guanggaowords);
                return $guanggaowords_arr;
            }else{
                return array();
            }
        }
    
    }
    

      

  • 相关阅读:
    【题解】洛谷P5048 Yuno loves sqrt technology III
    【题解】Codeforces1545D AquaMoon and Wrong Coordinate
    Linux压缩解压命令汇总
    Linux设置DNS地址及清理DNS缓存方法
    Linux设置系统运行模式
    JDBC 1.0
    iBATIS 历史三个版本小细节对比
    Oracle import/Export 命令
    初创互联网团队如何利用开源技术和云服务快速建立网站
    oracle Merge 函数
  • 原文地址:https://www.cnblogs.com/kingchou/p/7943304.html
Copyright © 2020-2023  润新知