由于数据是从数据库内读取 故不可能直接照搬 大体思路是利用之前分享的贝叶斯算法实现该机制,在实现过程中发现该算法对于培养的数据要求较高,由于我自己是随意分类,所以结果很差,不过也算是对于该方法的一个实现吧,只要修改了关键词以及分类应该可以得到较好的结果。
<?php include_once('config1.php'); include_once('Bayes.php'); $sunKindrow=11;//sunkind列 正式程序改为7 $result = mysql_query(" SELECT count(1) FROM `test` ");//获取样本个数 $row=mysql_fetch_array($result); $count = $row['count(1)']; $sunkind=array();//存储各个关键词对应的各个类别的比例 $keyword=array("编辑","工程");//关键词 $keyprecent=array();//关键词比例 //初始化 sunkind for($i=0;$i<count($keyword);$i++) { for($j=0;$j<$sunKindrow;$j++) { $sunkind[$i][$j]=0; } } //计算keyprecent sunkind for($i=0;$i<count($keyword);$i++) { //echo $keyword[$i]; $result1 = mysql_query(" SELECT count(1) FROM `test` WHERE `title` LIKE '%" . $keyword[$i] . "%'"); $row1 = mysql_fetch_array($result1); $count1 = $row1['count(1)']; $keyprecent[$i] = $count1/$count; $result2 = mysql_query(" SELECT * FROM `test` WHERE `title` LIKE '%" . $keyword[$i] . "%'"); while($row2 = mysql_fetch_array($result2)) { $sunkind[$i][$row2['kindid']]=$sunkind[$i][$row2['kindid']]+1; } for($j=0;$j<count($sunkind[$i]);$j++) { $sunkind[$i][$j]=$sunkind[$i][$j]/$count1; //去除小概率数据 if($sunkind[$i][$j]<0.1) $sunkind[$i][$j]=0; } } //以下为显示部分 $disease_labels = array("编辑", "工程"); // Where patient is assessed as to whether they exhibit symptom 1 and/or symptom 2 $symptom_labels = array("0","1","2","3","4","5","6","7","8","9","10","11"); $bayes = new Bayes($keyprecent, $sunkind); $bayes->getPosterior(); echo "<p>"; echo "The probability of each disease type given the presence or absence of symptoms is: "; echo "</p>"; $bayes->setRowLabels($symptom_labels); // i.e., evidence $bayes->setColumnLabels($disease_labels); // i.e., hypothesis $bayes->toHTML(); echo "<br />"; echo "<p>"; echo "Here is what a dump of the Bayes object looks: "; echo "</p>"; echo "<pre>"; print_r($bayes); echo "</pre>"; ?>
以下为所用的Bayes.php代码
<?php /** * Bayes * * @author Paul Meagher <paul@datavore.com> * @license PHP License v3.0 * * Calculates posterior probabilities for m hypotheses and n evidence * alternatives. The code was inspired by a procedural TrueBasic version * (Bayes.tru) bundled with Grimstead and Snell's excellent online * textbook "Introduction to Probability". * * @see http://www.dartmouth.edu/~chance/teaching_aids/books_articles/probability_book/book.html * */ class Bayes { /** * Number of evidence alternatives (i.e., number of rows). */ var $m; /** * Number of hypothesis alternatives (i.e., number of columns). */ var $n; /** * Output labels for evidence alternatives. */ var $row_labels = array(); /** * Output labels for hypothesis alternatives. */ var $column_labels = array(); /** * Vector container for prior probabilities. */ var $priors = array(); /** * Matrix container for likelihood of evidence e given hypothesis h. */ var $likelihoods = array(); /** * Matrix container for posterior probabilties. */ var $posterior = array(); /** * Vector container for evidence probabilties. */ var $evidence = array(); /** * Initialize the Bayes algorithm by setting the priors, likelihoods and * dimensions of the likelihood and posterior matrices. */ function Bayes($priors, $likelihoods) { $this->priors = $priors; $this->likelihoods = $likelihoods; $this->m = count($this->likelihoods); // num rows $this->n = count($this->likelihoods[0]); // num cols return true; } /** * Output method for setting row labels prior to display. */ function setRowLabels($row_labels) { $this->row_labels = $row_labels; return true; } /** * Output method for setting column labels prior to display. */ function setColumnLabels($column_labels) { $this->column_labels = $column_labels; return true; } /** * Compute the posterior probability matrix given the priors and likelihoods. * * The first set of loops computes the denominator of the canonical Bayes * equation. The probability appearing in the denominator serves a normalizing * role in the computation - it ensures that posterior probabilities sum to 1. * * The second set of loops: * * 1. multiplies the prior[$h] by the likelihood[$h][$e] * 2. divides the result by the denominator * 3. assigns the result to the posterior[$e][$h] probability matrix */ function getPosterior() { // Find probability of evidence e for($e=0; $e < $this->n; $e++) { $this->evidence[$e]=0; } for($e=0; $e < $this->n; $e++) { for ($h=0; $h < $this->m; $h++) { $this->evidence[$e] += $this->priors[$h] * $this->likelihoods[$h][$e]; } } // Find probability of hypothesis given evidence for($e=0; $e < $this->n; $e++) { for ($h=0; $h < $this->m; $h++) { $this->posterior[$e][$h] = $this->priors[$h] * $this->likelihoods[$h][$e] / $this->evidence[$e]; } } return true; } /** * Output method for displaying posterior probability matrix */ function toHTML($number_format="%01.3f") { ?> <table border='1' cellpadding='5' cellspacing='0'> <tr> <td> </td> <?php for ($h=0; $h < $this->m; $h++) { ?> <td align='center'><b><?php echo $this->column_labels[$h] ?></b></td> <?php } ?> </tr> <?php for($e=0; $e < $this->n; $e++) { ?> <tr> <td><b><?php echo $this->row_labels[$e] ?></b></td> <?php for ($h=0; $h < $this->m; $h++) { ?> <td align='right'><?php printf($number_format, $this->posterior[$e][$h]) ?></td> <?php } ?> </tr> <?php } ?> </table> <?php } } ?>