• PHP获取首字母相关方法


    项目中使用经常遇到各站需求。本次即遇到这个需求。

    在网上找了几份相关的方法,细数下来以下方法会稍微好一点。

    虽然同样有很多识别不出来,但是比起其他已经好很多

    第一个方法是我觉得用的比较好一些的

    它是封装的一个类库

    直接上代码

    <?php
    namespace appindexcontroller; 
     /**
    * Modified by fuyong @ 2015-09-13
    * 修复二分法查找方法
    * 汉字拼音首字母工具类
    *  注: 英文的字串:不变返回(包括数字)    eg .abc123 => abc123
    *      中文字符串:返回拼音首字符        eg. 测试字符串 => CSZFC
    *      中英混合串: 返回拼音首字符和英文   eg. 我i我j => WIWJ
    *  eg.
    *  $py = new pinyinfirstchar();
    *  $result = $py->getInitials('我想和你在一起');
    *  $result = $py->getFirstchar('小时候我就想和你在一起');
    */
    
    //下面3行测试代码
    // $py = new pinyinfirstchar();
    // $result = $py->getFirstchar('根据中文姓名');
    // print_r($result);
    
    class Aafirstchar
    {
        private $_pinyins = array(
            176161 => 'A',
            176197 => 'B',
            178193 => 'C',
            180238 => 'D',
            182234 => 'E',
            183162 => 'F',
            184193 => 'G',
            185254 => 'H',
            187247 => 'J',
            191166 => 'K',
            192172 => 'L',
            194232 => 'M',
            196195 => 'N',
            197182 => 'O',
            197190 => 'P',
            198218 => 'Q',
            200187 => 'R',
            200246 => 'S',
            203250 => 'T',
            205218 => 'W',
            206244 => 'X',
            209185 => 'Y',
            212209 => 'Z',
        );
        private $_charset = null;
        /**
         * 构造函数, 指定需要的编码 default: utf-8
         * 支持utf-8, gb2312
         *
         * @param unknown_type $charset
         */
        public function __construct( $charset = 'utf-8' )
        {
            $this->_charset    = $charset;
        }
        /**
         * 中文字符串 substr
         *
         * @param string $str
         * @param int    $start
         * @param int    $len
         * @return string
         */
        private function _msubstr ($str, $start, $len)
        {
            $start  = $start * 2;
            $len    = $len * 2;
            $strlen = strlen($str);
            $result = '';
            for ( $i = 0; $i < $strlen; $i++ ) {
                if ( $i >= $start && $i < ($start + $len) ) {
                    if ( ord(substr($str, $i, 1)) > 129 ) $result .= substr($str, $i, 2);
                    else $result .= substr($str, $i, 1);
                }
                if ( ord(substr($str, $i, 1)) > 129 ) $i++;
            }
            return $result;
        }
        /**
         * 字符串切分为数组 (汉字或者一个字符为单位)
         *
         * @param string $str
         * @return array
         */
        private function _cutWord( $str )
        {
            $words = array();
             while ( $str != "" )
             {
                if ( $this->_isAscii($str) ) {/*非中文*/
                    $words[] = $str[0];
                    $str = substr( $str, strlen($str[0]) );
                }else{
                    $word = $this->_msubstr( $str, 0, 1 );
                    $words[] = $word;
                    $str = substr( $str, strlen($word) );
                }
             }
             return $words;
        }
        /**
         * 判断字符是否是ascii字符
         *
         * @param string $char
         * @return bool
         */
        private function _isAscii( $char )
        {
            return ( ord( substr($char,0,1) ) < 160 );
        }
        /**
         * 判断字符串前3个字符是否是ascii字符
         *
         * @param string $str
         * @return bool
         */
        private function _isAsciis( $str )
        {
            $len = strlen($str) >= 3 ? 3: 2;
            $chars = array();
            for( $i = 1; $i < $len -1; $i++ ){
                $chars[] = $this->_isAscii( $str[$i] ) ? 'yes':'no';
            }
            $result = array_count_values( $chars );
            if ( empty($result['no']) ){
                return true;
            }
            return false;
        }
        /**
         * 获取中文字串的拼音首字符
         *
         * @param string $str
         * @return string
         */
        public function getInitials( $str )
        {
            if ( empty($str) ) return '';
            if ( $this->_isAscii($str[0]) && $this->_isAsciis( $str )){
                return $str;
            }
            $result = array();
            if ( $this->_charset == 'utf-8' ){
                $str = iconv( 'utf-8', 'gbk//ignore', $str );
            }
            $words = $this->_cutWord( $str );
            foreach ( $words as $word )
            {
                if ( $this->_isAscii($word) ) {/*非中文*/
                    $result[] = $word;
                    continue;
                }
                $code = ord( substr($word,0,1) ) * 1000 + ord( substr($word,1,1) );
                /*获取拼音首字母A--Z*/
                if ( ($i = $this->_search($code)) != -1 ){
                    $result[] = $this->_pinyins[$i];
                }
            }
            return strtoupper(implode('',$result));
        }
        private function _getChar( $ascii )
        {
            if ( $ascii >= 48 && $ascii <= 57){
                return chr($ascii);  /*数字*/
            }elseif ( $ascii>=65 && $ascii<=90 ){
                return chr($ascii);   /* A--Z*/
            }elseif ($ascii>=97 && $ascii<=122){
                return chr($ascii-32); /* a--z*/
            }else{
                return '-'; /*其他*/
            }
        }
    
        /**
         * 查找需要的汉字内码(gb2312) 对应的拼音字符( 二分法 )
         *
         * @param int $code
         * @return int
         */
        private function _search( $code )
        {
            $data = array_keys($this->_pinyins);
            $lower = 0;
            $upper = sizeof($data)-1;
            $middle = (int) round(($lower + $upper) / 2);
            if ( $code < $data[0] ) return -1;
            for (;;) {
                if ( $lower > $upper ){
                    return $data[$lower-1];
                }
                $tmp = (int) round(($lower + $upper) / 2);
                if ( !isset($data[$tmp]) ){
                    return $data[$middle];
                }else{ 
                    $middle = $tmp;
                }
                if ( $data[$middle] < $code ){
                    $lower = (int)$middle + 1;
                }else if ( $data[$middle] == $code ) {
                    return $data[$middle];
                }else{
                    $upper = (int)$middle - 1;
                }
            }
        }
    
        /**
         * 获取一整串中文字串的拼音首字符(只返回1个字符)
         *
         * @param string $str
         * @return string
         */
        public function getFirstchar( $str )
        {
            if ( empty($str) ) return '';
            return substr($this->getInitials($str), 0, 1);
        }
    }
    ?>

    这个是我个人认为比其他的好一点的方法

    借鉴与   大大博客 地址https://blog.csdn.net/liiuweii/article/details/52239508

    补充一下本人调用方法:

    #获取对应的首字母
    function getUsersInit($nickname)
    {
        $py = new appindexcontrollerAafirstchar();
        $res = $py->getFirstchar($nickname);
    
        $init = !empty(ifUsersInit(strtoupper($res))) ? strtoupper($res) : '#';
        return $init;
    }
    
    #根据首字母来进行对比是否在对应字段
    function ifUsersInit($str)
    {
        return strstr('ABCDEFGHIJKLMNOPQRSTUVWXYZ',$str);
    }

    其他方法基本是使用ASCII码表来确定的,这种的获取不如上面的方法好

    我选择了两个比较有对比性的存放了一下下

    方法一

    function getfirstchar($s0)
    {
        $s=iconv('UTF-8','gb2312', $s0);
        if (ord($s0)>128) { //汉字开头
            $asc=ord($s{0})*256+ord($s{1})-65536;
            if($asc>=-20319 and $asc<=-20284)return "A";
            if($asc>=-20283 and $asc<=-19776)return "B";
            if($asc>=-19775 and $asc<=-19219)return "C";
            if($asc>=-19218 and $asc<=-18711)return "D";
            if($asc>=-18710 and $asc<=-18527)return "E"; 
            if($asc>=-18526 and $asc<=-18240)return "F"; 
            if($asc>=-18239 and $asc<=-17923)return "G"; 
            if($asc>=-17922 and $asc<=-17418)return "I";              
            if($asc>=-17417 and $asc<=-16475)return "J";              
            if($asc>=-16474 and $asc<=-16213)return "K";              
            if($asc>=-16212 and $asc<=-15641)return "L";              
            if($asc>=-15640 and $asc<=-15166)return "M";              
            if($asc>=-15165 and $asc<=-14923)return "N";              
            if($asc>=-14922 and $asc<=-14915)return "O";              
            if($asc>=-14914 and $asc<=-14631)return "P";              
            if($asc>=-14630 and $asc<=-14150)return "Q";              
            if($asc>=-14149 and $asc<=-14091)return "R";              
            if($asc>=-14090 and $asc<=-13319)return "S";              
            if($asc>=-13318 and $asc<=-12839)return "T";              
            if($asc>=-12838 and $asc<=-12557)return "W";              
            if($asc>=-12556 and $asc<=-11848)return "X";              
            if($asc>=-11847 and $asc<=-11056)return "Y";              
            if($asc>=-11055 and $asc<=-10247)return "Z";  
        }else if(ord($s)>=48 and ord($s)<=57){ //数字开头
            switch(iconv_substr($s,0,1,'utf-8'))
            {
                case 1:return "Y";
                case 2:return "E";
                case 3:return "S";
                case 4:return "S";
                case 5:return "W";
                case 6:return "L";
                case 7:return "Q";
                case 8:return "B";
                case 9:return "J";
                case 0:return "L";
            }                
        }else if(ord($s)>=65 and ord($s)<=90){ //大写英文开头
            return substr($s,0,1);
        }else if(ord($s)>=97 and ord($s)<=122){ //小写英文开头
            return strtoupper(substr($s,0,1));
        }
        else
        {
            return iconv_substr($s0,0,1,'utf-8');//中英混合的词语,不适合上面的各种情况,因此直接提取首个字符即可
        }
    }

    本篇借鉴与  大大博客 地址:https://blog.csdn.net/PHP1923880282/article/details/8833192

    方法二

    //获取汉字的首字母
    function getFirstCharters($str)
    {
      if (empty($str)) {
        return '';
      }
      //取出参数字符串中的首个字符
      $temp_str = substr($str,0,1);
      if(ord($temp_str) > 127){
        $str = substr($str,0,3);
      }else{
        $str = $temp_str;
        $fchar = ord($str{0});
        if ($fchar >= ord('A') && $fchar <= ord('z')){
          return strtoupper($temp_str);
        }else{
          return null;
        }
      }
      $s1 = iconv('UTF-8', 'gb2312//IGNORE', $str);
      if(empty($s1)){
        return null;
      }
      $s2 = iconv('gb2312', 'UTF-8', $s1);
      if(empty($s2)){
        return null;
      }
      $s = $s2 == $str ? $s1 : $str;
      $asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
      if ($asc >= -20319 && $asc <= -20284)
        return 'A';
      if ($asc >= -20283 && $asc <= -19776)
        return 'B';
      if ($asc >= -19775 && $asc <= -19219)
        return 'C';
      if ($asc >= -19218 && $asc <= -18711)
        return 'D';
      if ($asc >= -18710 && $asc <= -18527)
        return 'E';
      if ($asc >= -18526 && $asc <= -18240)
        return 'F';
      if ($asc >= -18239 && $asc <= -17923)
        return 'G';
      if ($asc >= -17922 && $asc <= -17418)
        return 'H';
      if ($asc >= -17417 && $asc <= -16475)
        return 'J';
      if ($asc >= -16474 && $asc <= -16213)
        return 'K';
      if ($asc >= -16212 && $asc <= -15641)
        return 'L';
      if ($asc >= -15640 && $asc <= -15166)
        return 'M';
      if ($asc >= -15165 && $asc <= -14923)
        return 'N';
      if ($asc >= -14922 && $asc <= -14915)
        return 'O';
      if ($asc >= -14914 && $asc <= -14631)
        return 'P';
      if ($asc >= -14630 && $asc <= -14150)
        return 'Q';
      if ($asc >= -14149 && $asc <= -14091)
        return 'R';
      if ($asc >= -14090 && $asc <= -13319)
        return 'S';
      if ($asc >= -13318 && $asc <= -12839)
        return 'T';
      if ($asc >= -12838 && $asc <= -12557)
        return 'W';
      if ($asc >= -12556 && $asc <= -11848)
        return 'X';
      if ($asc >= -11847 && $asc <= -11056)
        return 'Y';
      if ($asc >= -11055 && $asc <= -10247)
        return 'Z';
      return rare_words($asc);
    }
    //百家姓中的生僻字
    function rare_words($asc=''){
      $rare_arr = array(
        -3652=>array('word'=>"窦",'first_char'=>'D'),
        -8503=>array('word'=>"奚",'first_char'=>'X'),
        -9286=>array('word'=>"酆",'first_char'=>'F'),
        -7761=>array('word'=>"岑",'first_char'=>'C'),
        -5128=>array('word'=>"滕",'first_char'=>'T'),
        -9479=>array('word'=>"邬",'first_char'=>'W'),
        -5456=>array('word'=>"臧",'first_char'=>'Z'),
        -7223=>array('word'=>"闵",'first_char'=>'M'),
        -2877=>array('word'=>"裘",'first_char'=>'Q'),
        -6191=>array('word'=>"缪",'first_char'=>'M'),
        -5414=>array('word'=>"贲",'first_char'=>'B'),
        -4102=>array('word'=>"嵇",'first_char'=>'J'),
        -8969=>array('word'=>"荀",'first_char'=>'X'),
        -4938=>array('word'=>"於",'first_char'=>'Y'),
        -9017=>array('word'=>"芮",'first_char'=>'R'),
        -2848=>array('word'=>"羿",'first_char'=>'Y'),
        -9477=>array('word'=>"邴",'first_char'=>'B'),
        -9485=>array('word'=>"隗",'first_char'=>'K'),
        -6731=>array('word'=>"宓",'first_char'=>'M'),
        -9299=>array('word'=>"郗",'first_char'=>'X'),
        -5905=>array('word'=>"栾",'first_char'=>'L'),
        -4393=>array('word'=>"钭",'first_char'=>'T'),
        -9300=>array('word'=>"郜",'first_char'=>'G'),
        -8706=>array('word'=>"蔺",'first_char'=>'L'),
        -3613=>array('word'=>"胥",'first_char'=>'X'),
        -8777=>array('word'=>"莘",'first_char'=>'S'),
        -6708=>array('word'=>"逄",'first_char'=>'P'),
        -9302=>array('word'=>"郦",'first_char'=>'L'),
        -5965=>array('word'=>"璩",'first_char'=>'Q'),
        -6745=>array('word'=>"濮",'first_char'=>'P'),
        -4888=>array('word'=>"扈",'first_char'=>'H'),
        -9309=>array('word'=>"郏",'first_char'=>'J'),
        -5428=>array('word'=>"晏",'first_char'=>'Y'),
        -2849=>array('word'=>"暨",'first_char'=>'J'),
        -7206=>array('word'=>"阙",'first_char'=>'Q'),
        -4945=>array('word'=>"殳",'first_char'=>'S'),
        -9753=>array('word'=>"夔",'first_char'=>'K'),
        -10041=>array('word'=>"厍",'first_char'=>'S'),
        -5429=>array('word'=>"晁",'first_char'=>'C'),
        -2396=>array('word'=>"訾",'first_char'=>'Z'),
        -7205=>array('word'=>"阚",'first_char'=>'K'),
        -10049=>array('word'=>"乜",'first_char'=>'N'),
        -10015=>array('word'=>"蒯",'first_char'=>'K'),
        -3133=>array('word'=>"竺",'first_char'=>'Z'),
        -6698=>array('word'=>"逯",'first_char'=>'L'),
        -9799=>array('word'=>"俟",'first_char'=>'Q'),
        -6749=>array('word'=>"澹",'first_char'=>'T'),
        -7220=>array('word'=>"闾",'first_char'=>'L'),
        -10047=>array('word'=>"亓",'first_char'=>'Q'),
        -10005=>array('word'=>"仉",'first_char'=>'Z'),
        -3417=>array('word'=>"颛",'first_char'=>'Z'),
        -6431=>array('word'=>"驷",'first_char'=>'S'),
        -7226=>array('word'=>"闫",'first_char'=>'Y'),
        -9293=>array('word'=>"鄢",'first_char'=>'Y'),
        -6205=>array('word'=>"缑",'first_char'=>'G'),
        -9764=>array('word'=>"佘",'first_char'=>'S'),
        -9818=>array('word'=>"佴",'first_char'=>'N'),
        -9509=>array('word'=>"谯",'first_char'=>'Q'),
        -3122=>array('word'=>"笪",'first_char'=>'D'),
        -9823=>array('word'=>"佟",'first_char'=>'T'),
      );
      if(array_key_exists($asc, $rare_arr) && $rare_arr[$asc]['first_char']){
        return $rare_arr[$asc]['first_char'] ;
      }else{
        return null;
      }
    }
    //测试:
    echo getFirstCharters('窦');

    本篇借鉴与  学知无涯 大大博客 地址:https://www.cnblogs.com/gyfluck/p/8521259.html

    然后就是最基础版本的方法

    function Getzimu($str)
    {
        if(empty($str)){return '';}
    
        $fchar=ord($str{0});
    
        if($fchar>=ord('A')&&$fchar<=ord('z')) return strtoupper($str{0});
    
        $s1=iconv('UTF-8','gb2312',$str);
    
        $s2=iconv('gb2312','UTF-8',$s1);
    
        $s=$s2==$str?$s1:$str;
    
        $asc=ord($s{0})*256+ord($s{1})-65536;
    
        if($asc>=-20319&&$asc<=-20284) return 'A';
    
        if($asc>=-20283&&$asc<=-19776) return 'B';
    
        if($asc>=-19775&&$asc<=-19219) return 'C';
    
        if($asc>=-19218&&$asc<=-18711) return 'D';
    
        if($asc>=-18710&&$asc<=-18527) return 'E';
    
        if($asc>=-18526&&$asc<=-18240) return 'F';
    
        if($asc>=-18239&&$asc<=-17923) return 'G';
    
        if($asc>=-17922&&$asc<=-17418) return 'H';
    
        if($asc>=-17417&&$asc<=-16475) return 'J';
    
        if($asc>=-16474&&$asc<=-16213) return 'K';
    
        if($asc>=-16212&&$asc<=-15641) return 'L';
    
        if($asc>=-15640&&$asc<=-15166) return 'M';
    
        if($asc>=-15165&&$asc<=-14923) return 'N';
    
        if($asc>=-14922&&$asc<=-14915) return 'O';
    
        if($asc>=-14914&&$asc<=-14631) return 'P';
    
        if($asc>=-14630&&$asc<=-14150) return 'Q';
    
        if($asc>=-14149&&$asc<=-14091) return 'R';
    
        if($asc>=-14090&&$asc<=-13319) return 'S';
    
        if($asc>=-13318&&$asc<=-12839) return 'T';
    
        if($asc>=-12838&&$asc<=-12557) return 'W';
    
        if($asc>=-12556&&$asc<=-11848) return 'X';
    
        if($asc>=-11847&&$asc<=-11056) return 'Y';
    
        if($asc>=-11055&&$asc<=-10247) return 'Z';
    
        return "#";
    }

    本篇借鉴与 PEIZIJUN  大大博客  地址:https://www.jianshu.com/p/2ac2b9c369b3

    最后就是个人找到的最近出版本,和上种方法基本一致

    #获取中文首字母
    function Getzimu($str) 
    { 
        $str= iconv("UTF-8","gb2312", $str);//如果程序是gbk的,此行就要注释掉 
        if (preg_match("/^[x7f-xff]/", $str)) 
        { 
            $fchar=ord($str{0}); 
            if($fchar>=ord("A") and $fchar<=ord("z") )return strtoupper($str{0}); 
            $a = $str; 
            $val=ord($a{0})*256+ord($a{1})-65536; 
            if($val>=-20319 and $val<=-20284)return "A"; 
            if($val>=-20283 and $val<=-19776)return "B"; 
            if($val>=-19775 and $val<=-19219)return "C"; 
            if($val>=-19218 and $val<=-18711)return "D"; 
            if($val>=-18710 and $val<=-18527)return "E"; 
            if($val>=-18526 and $val<=-18240)return "F"; 
            if($val>=-18239 and $val<=-17923)return "G"; 
            if($val>=-17922 and $val<=-17418)return "H"; 
            if($val>=-17417 and $val<=-16475)return "J"; 
            if($val>=-16474 and $val<=-16213)return "K"; 
            if($val>=-16212 and $val<=-15641)return "L"; 
            if($val>=-15640 and $val<=-15166)return "M"; 
            if($val>=-15165 and $val<=-14923)return "N"; 
            if($val>=-14922 and $val<=-14915)return "O"; 
            if($val>=-14914 and $val<=-14631)return "P"; 
            if($val>=-14630 and $val<=-14150)return "Q"; 
            if($val>=-14149 and $val<=-14091)return "R"; 
            if($val>=-14090 and $val<=-13319)return "S"; 
            if($val>=-13318 and $val<=-12839)return "T"; 
            if($val>=-12838 and $val<=-12557)return "W"; 
            if($val>=-12556 and $val<=-11848)return "X"; 
            if($val>=-11847 and $val<=-11056)return "Y"; 
            if($val>=-11055 and $val<=-10247)return "Z"; 
        }  
        else 
        { 
            return false; 
        } 
    } 

    以上就是本人找到的获取首字母的相关方法,希望对您有帮助

    2020年07月01日

     

  • 相关阅读:
    36-图像有用区(dfs, bfs)
    35-迷宫寻宝(一)-NYOJ82
    34- 24 Point game
    32-回文字符串(dp)
    71-n皇后
    70-合并数字
    2017.11.18 C语言的算法分析题目
    2017.11.17 C++系列---用malloc动态给c++二维数组的申请与释放操作
    2017.11.16 JavaWeb-------第八章 EL、JSTL、Ajax技术
    2017.11.15 JavaWeb的学生体质管理系统
  • 原文地址:https://www.cnblogs.com/YFYQ/p/13220526.html
Copyright © 2020-2023  润新知