php根据汉字获取拼音(php基于拼音搜索实现原理)
代码一:获取字符串汉字首字母,兼容GBK和UTF-8
<?php function getfirstchar( $s0 ){ //获取单个汉字拼音首字母。注意:此处不要纠结。汉字拼音是没有以U和V开头的 $fchar = ord( $s0 {0}); if ( $fchar >= ord( "A" ) and $fchar <= ord( "z" ) ) return strtoupper ( $s0 {0}); $s1 = iconv( "UTF-8" , "gb2312" , $s0 ); $s2 = iconv( "gb2312" , "UTF-8" , $s1 ); if ( $s2 == $s0 ){ $s = $s1 ;} else { $s = $s0 ;} $asc = ord( $s {0}) * 256 + ord( $s {1}) - 65536; if ( $asc >= -20319 and $asc <= -20284) return "A" ; if ( $asc >= -20283 and $asc <= -19776) return "B" ; if ( $asc >= -19775 and $asc <= -19219) return "C" ; if ( $asc >= -19218 and $asc <= -18711) return "D" ; if ( $asc >= -18710 and $asc <= -18527) return "E" ; if ( $asc >= -18526 and $asc <= -18240) return "F" ; if ( $asc >= -18239 and $asc <= -17923) return "G" ; if ( $asc >= -17922 and $asc <= -17418) return "H" ; if ( $asc >= -17922 and $asc <= -17418) return "I" ; if ( $asc >= -17417 and $asc <= -16475) return "J" ; if ( $asc >= -16474 and $asc <= -16213) return "K" ; if ( $asc >= -16212 and $asc <= -15641) return "L" ; if ( $asc >= -15640 and $asc <= -15166) return "M" ; if ( $asc >= -15165 and $asc <= -14923) return "N" ; if ( $asc >= -14922 and $asc <= -14915) return "O" ; if ( $asc >= -14914 and $asc <= -14631) return "P" ; if ( $asc >= -14630 and $asc <= -14150) return "Q" ; if ( $asc >= -14149 and $asc <= -14091) return "R" ; if ( $asc >= -14090 and $asc <= -13319) return "S" ; if ( $asc >= -13318 and $asc <= -12839) return "T" ; if ( $asc >= -12838 and $asc <= -12557) return "W" ; if ( $asc >= -12556 and $asc <= -11848) return "X" ; if ( $asc >= -11847 and $asc <= -11056) return "Y" ; if ( $asc >= -11055 and $asc <= -10247) return "Z" ; return NULL; //return $s0; } function pinyin_long( $zh ){ //获取整条字符串汉字拼音首字母 $ret = "" ; $s1 = iconv( "UTF-8" , "gb2312" , $zh ); $s2 = iconv( "gb2312" , "UTF-8" , $s1 ); if ( $s2 == $zh ){ $zh = $s1 ;} for ( $i = 0; $i < strlen ( $zh ); $i ++){ $s1 = substr ( $zh , $i ,1); $p = ord( $s1 ); if ( $p > 160){ $s2 = substr ( $zh , $i ++,2); $ret .= getfirstchar( $s2 ); } else { $ret .= $s1 ; } } return $ret ; } echo pinyin_long( '*《,@#$123HAHadf一年后' ); ?> |
输出:
代码二:获取字符串全拼,仅支持GBK
<?php class ChineseSpell { //注意仅支持:GBK,故传入参数前须转码 /** * @var array $chineseSpellList 拼音编码对应表 * @access private */ private $chineseSpellList = array ( 'a' =>-20319, 'ai' =>-20317, 'an' =>-20304, 'ang' =>-20295, 'ao' =>-20292, 'ba' =>-20283, 'bai' =>-20265, 'ban' =>-20257, 'bang' =>-20242, 'bao' =>-20230, 'bei' =>-20051, 'ben' =>-20036, 'beng' =>-20032, 'bi' =>-20026, 'bian' =>-20002, 'biao' =>-19990, 'bie' =>-19986, 'bin' =>-19982, 'bing' =>-19976, 'bo' =>-19805, 'bu' =>-19784, 'ca' =>-19775, 'cai' =>-19774, 'can' =>-19763, 'cang' =>-19756, 'cao' =>-19751, 'ce' =>-19746, 'ceng' =>-19741, 'cha' =>-19739, 'chai' =>-19728, 'chan' =>-19725, 'chang' =>-19715, 'chao' =>-19540, 'che' =>-19531, 'chen' =>-19525, 'cheng' =>-19515, 'chi' =>-19500, 'chong' =>-19484, 'chou' =>-19479, 'chu' =>-19467, 'chuai' =>-19289, 'chuan' =>-19288, 'chuang' =>-19281, 'chui' =>-19275, 'chun' =>-19270, 'chuo' =>-19263, 'ci' =>-19261, 'cong' =>-19249, 'cou' =>-19243, 'cu' =>-19242, 'cuan' =>-19238, 'cui' =>-19235, 'cun' =>-19227, 'cuo' =>-19224, 'da' =>-19218, 'dai' =>-19212, 'dan' =>-19038, 'dang' =>-19023, 'dao' =>-19018, 'de' =>-19006, 'deng' =>-19003, 'di' =>-18996, 'dian' =>-18977, 'diao' =>-18961, 'die' =>-18952, 'ding' =>-18783, 'diu' =>-18774, 'dong' =>-18773, 'dou' =>-18763, 'du' =>-18756, 'duan' =>-18741, 'dui' =>-18735, 'dun' =>-18731, 'duo' =>-18722, 'e' =>-18710, 'en' =>-18697, 'er' =>-18696, 'fa' =>-18526, 'fan' =>-18518, 'fang' =>-18501, 'fei' =>-18490, 'fen' =>-18478, 'feng' =>-18463, 'fo' =>-18448, 'fou' =>-18447, 'fu' =>-18446, 'ga' =>-18239, 'gai' =>-18237, 'gan' =>-18231, 'gang' =>-18220, 'gao' =>-18211, 'ge' =>-18201, 'gei' =>-18184, 'gen' =>-18183, 'geng' =>-18181, 'gong' =>-18012, 'gou' =>-17997, 'gu' =>-17988, 'gua' =>-17970, 'guai' =>-17964, 'guan' =>-17961, 'guang' =>-17950, 'gui' =>-17947, 'gun' =>-17931, 'guo' =>-17928, 'ha' =>-17922, 'hai' =>-17759, 'han' =>-17752, 'hang' =>-17733, 'hao' =>-17730, 'he' =>-17721, 'hei' =>-17703, 'hen' =>-17701, 'heng' =>-17697, 'hong' =>-17692, 'hou' =>-17683, 'hu' =>-17676, 'hua' =>-17496, 'huai' =>-17487, 'huan' =>-17482, 'huang' =>-17468, 'hui' =>-17454, 'hun' =>-17433, 'huo' =>-17427, 'ji' =>-17417, 'jia' =>-17202, 'jian' =>-17185, 'jiang' =>-16983, 'jiao' =>-16970, 'jie' =>-16942, 'jin' =>-16915, 'jing' =>-16733, 'jiong' =>-16708, 'jiu' =>-16706, 'ju' =>-16689, 'juan' =>-16664, 'jue' =>-16657, 'jun' =>-16647, 'ka' =>-16474, 'kai' =>-16470, 'kan' =>-16465, 'kang' =>-16459, 'kao' =>-16452, 'ke' =>-16448, 'ken' =>-16433, 'keng' =>-16429, 'kong' =>-16427, 'kou' =>-16423, 'ku' =>-16419, 'kua' =>-16412, 'kuai' =>-16407, 'kuan' =>-16403, 'kuang' =>-16401, 'kui' =>-16393, 'kun' =>-16220, 'kuo' =>-16216, 'la' =>-16212, 'lai' =>-16205, 'lan' =>-16202, 'lang' =>-16187, 'lao' =>-16180, 'le' =>-16171, 'lei' =>-16169, 'leng' =>-16158, 'li' =>-16155, 'lia' =>-15959, 'lian' =>-15958, 'liang' =>-15944, 'liao' =>-15933, 'lie' =>-15920, 'lin' =>-15915, 'ling' =>-15903, 'liu' =>-15889, 'long' =>-15878, 'lou' =>-15707, 'lu' =>-15701, 'lv' =>-15681, 'luan' =>-15667, 'lue' =>-15661, 'lun' =>-15659, 'luo' =>-15652, 'ma' =>-15640, 'mai' =>-15631, 'man' =>-15625, 'mang' =>-15454, 'mao' =>-15448, 'me' =>-15436, 'mei' =>-15435, 'men' =>-15419, 'meng' =>-15416, 'mi' =>-15408, 'mian' =>-15394, 'miao' =>-15385, 'mie' =>-15377, 'min' =>-15375, 'ming' =>-15369, 'miu' =>-15363, 'mo' =>-15362, 'mou' =>-15183, 'mu' =>-15180, 'na' =>-15165, 'nai' =>-15158, 'nan' =>-15153, 'nang' =>-15150, 'nao' =>-15149, 'ne' =>-15144, 'nei' =>-15143, 'nen' =>-15141, 'neng' =>-15140, 'ni' =>-15139, 'nian' =>-15128, 'niang' =>-15121, 'niao' =>-15119, 'nie' =>-15117, 'nin' =>-15110, 'ning' =>-15109, 'niu' =>-14941, 'nong' =>-14937, 'nu' =>-14933, 'nv' =>-14930, 'nuan' =>-14929, 'nue' =>-14928, 'nuo' =>-14926, 'o' =>-14922, 'ou' =>-14921, 'pa' =>-14914, 'pai' =>-14908, 'pan' =>-14902, 'pang' =>-14894, 'pao' =>-14889, 'pei' =>-14882, 'pen' =>-14873, 'peng' =>-14871, 'pi' =>-14857, 'pian' =>-14678, 'piao' =>-14674, 'pie' =>-14670, 'pin' =>-14668, 'ping' =>-14663, 'po' =>-14654, 'pu' =>-14645, 'qi' =>-14630, 'qia' =>-14594, 'qian' =>-14429, 'qiang' =>-14407, 'qiao' =>-14399, 'qie' =>-14384, 'qin' =>-14379, 'qing' =>-14368, 'qiong' =>-14355, 'qiu' =>-14353, 'qu' =>-14345, 'quan' =>-14170, 'que' =>-14159, 'qun' =>-14151, 'ran' =>-14149, 'rang' =>-14145, 'rao' =>-14140, 're' =>-14137, 'ren' =>-14135, 'reng' =>-14125, 'ri' =>-14123, 'rong' =>-14122, 'rou' =>-14112, 'ru' =>-14109, 'ruan' =>-14099, 'rui' =>-14097, 'run' =>-14094, 'ruo' =>-14092, 'sa' =>-14090, 'sai' =>-14087, 'san' =>-14083, 'sang' =>-13917, 'sao' =>-13914, 'se' =>-13910, 'sen' =>-13907, 'seng' =>-13906, 'sha' =>-13905, 'shai' =>-13896, 'shan' =>-13894, 'shang' =>-13878, 'shao' =>-13870, 'she' =>-13859, 'shen' =>-13847, 'sheng' =>-13831, 'shi' =>-13658, 'shou' =>-13611, 'shu' =>-13601, 'shua' =>-13406, 'shuai' =>-13404, 'shuan' =>-13400, 'shuang' =>-13398, 'shui' =>-13395, 'shun' =>-13391, 'shuo' =>-13387, 'si' =>-13383, 'song' =>-13367, 'sou' =>-13359, 'su' =>-13356, 'suan' =>-13343, 'sui' =>-13340, 'sun' =>-13329, 'suo' =>-13326, 'ta' =>-13318, 'tai' =>-13147, 'tan' =>-13138, 'tang' =>-13120, 'tao' =>-13107, 'te' =>-13096, 'teng' =>-13095, 'ti' =>-13091, 'tian' =>-13076, 'tiao' =>-13068, 'tie' =>-13063, 'ting' =>-13060, 'tong' =>-12888, 'tou' =>-12875, 'tu' =>-12871, 'tuan' =>-12860, 'tui' =>-12858, 'tun' =>-12852, 'tuo' =>-12849, 'wa' =>-12838, 'wai' =>-12831, 'wan' =>-12829, 'wang' =>-12812, 'wei' =>-12802, 'wen' =>-12607, 'weng' =>-12597, 'wo' =>-12594, 'wu' =>-12585, 'xi' =>-12556, 'xia' =>-12359, 'xian' =>-12346, 'xiang' =>-12320, 'xiao' =>-12300, 'xie' =>-12120, 'xin' =>-12099, 'xing' =>-12089, 'xiong' =>-12074, 'xiu' =>-12067, 'xu' =>-12058, 'xuan' =>-12039, 'xue' =>-11867, 'xun' =>-11861, 'ya' =>-11847, 'yan' =>-11831, 'yang' =>-11798, 'yao' =>-11781, 'ye' =>-11604, 'yi' =>-11589, 'yin' =>-11536, 'ying' =>-11358, 'yo' =>-11340, 'yong' =>-11339, 'you' =>-11324, 'yu' =>-11303, 'yuan' =>-11097, 'yue' =>-11077, 'yun' =>-11067, 'za' =>-11055, 'zai' =>-11052, 'zan' =>-11045, 'zang' =>-11041, 'zao' =>-11038, 'ze' =>-11024, 'zei' =>-11020, 'zen' =>-11019, 'zeng' =>-11018, 'zha' =>-11014, 'zhai' =>-10838, 'zhan' =>-10832, 'zhang' =>-10815, 'zhao' =>-10800, 'zhe' =>-10790, 'zhen' =>-10780, 'zheng' =>-10764, 'zhi' =>-10587, 'zhong' =>-10544, 'zhou' =>-10533, 'zhu' =>-10519, 'zhua' =>-10331, 'zhuai' =>-10329, 'zhuan' =>-10328, 'zhuang' =>-10322, 'zhui' =>-10315, 'zhun' =>-10309, 'zhuo' =>-10307, 'zi' =>-10296, 'zong' =>-10281, 'zou' =>-10274, 'zu' =>-10270, 'zuan' =>-10262, 'zui' =>-10260, 'zun' =>-10256, 'zuo' =>-10254 ); /** * 取汉字所有拼音 * @param string $chinese 要转换的汉字 * @param string $delimiter 分隔符 * @param int $length 返回的长度 * @return string */ public function getFullSpell( $chinese , $delimiter = '' , $length = 0){ $spell = $this ->getChineseSpells( $chinese , $delimiter ); if ( $length ){ $spell = substr ( $spell , 0, $length ); } return $spell ; } /** * 取汉字第【一个】汉字完整拼音 * @param string $chinese 要转换的汉字 * @param int $length 返回的长度 * @return string */ public function getFirstSpell( $chinese , $length = 0){ $spell = $this ->getChineseSpells( $chinese , ' ' , 1); if ( $length ){ $spell = substr ( $spell , 0, $length ); } return $spell ; } /** * 取一个汉字码对应的拼音 * @param int $num 汉字码 * @param string $blank 空白字符 * @return string */ public function getChineseSpell ( $num , $blank = '' ){ if ( $num >0 && $num <160){ return chr ( $num ); } elseif ( $num <-20319|| $num >-10247){ return $blank ; } else { foreach ( $this ->chineseSpellList as $spell => $code ){ if ( $code > $num ) break ; $result = $spell ; } return $result ; } } /** * 取汉字拼音 * @param string $chinese 要转换的汉字 * @param string $delimiter 分隔符 * @param int $first 是否只返回第一个 * @return string */ public function getChineseSpells( $chinese , $delimiter = '' , $first =0){ $result = array (); for ( $i =0; $i < strlen ( $chinese ); $i ++){ $p = ord( substr ( $chinese , $i ,1)); if ( $p >160){ $q = ord( substr ( $chinese ,++ $i ,1)); $p = $p *256 + $q - 65536; } $result [] = $this ->getChineseSpell( $p ); if ( $first ){ return $result [0]; } } return implode( $delimiter , $result ); } } $ok = new ChineseSpell(); $str = '%^&《》:“{}AGDFasdfas===取汉字所有拼音' ; echo $ok ->getFullSpell( $str ); echo '<p />' ; echo $ok ->getChineseSpells( $str ); ?> |
输出:
本文出自 “PHPer许琴-专注于PHP技术” 博客,请务必保留此出处http://xuqin.blog.51cto.com/5183168/1223888