• 一个用PHP写的中文分词函数


    <?php

    class Segmentation {
    var $options = array('lowercase' => TRUE,
    'segment_english' => FALSE);
    var $dict_name = 'Unknown';
    var $dict_words = array();
    function setLowercase($value) {
    if ($value) {
    $this->options['lowercase'] = TRUE;
    } else {
    $this->options['lowercase'] = FALSE;
    }
    return TRUE;
    }
    function setSegmentEnglish($value) {
    if ($value) {
    $this->options['segment_english'] = TRUE;
    } else {
    $this->options['segment_english'] = FALSE;
    }
    return TRUE;
    }
    function load($dict_file) {
    if (!file_exists($dict_file)) {
    return FALSE;
    }
    $fp = fopen($dict_file, 'r');
    $temp = fgets($fp, 1024);
    if ($temp === FALSE) {
    return FALSE;
    } else {
    if (strpos($temp, "t") !== FALSE) {
    list ($dict_type, $dict_name) = explode("t", trim($temp));
    } else {
    $dict_type = trim($temp);
    $dict_name = 'Unknown';
    }
    $this->dict_name = $dict_name;
    if ($dict_type !== 'DICT_WORD_W') {
    return FALSE;
    }
    }
    while (!feof($fp)) {
    $this->dict_words[rtrim(fgets($fp, 32))] = 1;
    }
    fclose($fp);
    return TRUE;
    }
    function getDictName() {
    return $this->dict_name;
    }
    function segmentString($str) {
    if (count($this->dict_words) === 0) {
    return FALSE;
    }
    $lines = explode("n", $str);
    return $this->_segmentLines($lines);
    }
    function segmentFile($filename) {
    if (count($this->dict_words) === 0) {
    return FALSE;
    }
    $lines = file($filename);
    return $this->_segmentLines($lines);
    }
    function _segmentLines($lines) {
    $contents_segmented = '';
    foreach ($lines as $line) {
    $contents_segmented .= $this->_segmentLine(rtrim($line)) . " n";
    }
    do {
    $contents_segmented = str_replace(' ', ' ', $contents_segmented);
    } while (strpos($contents_segmented, ' ') !== FALSE);
    return $contents_segmented;?>

  • 相关阅读:
    Django + Uwsgi + Nginx 的概念
    ubantu+nginx+uwsgi+django部署
    FileZilla以root用户登录Linux
    全文检索django-haystack+jieba+whoosh
    七牛云上传视频
    JWT登录与多方式登录
    vue绑定用户页面
    绑定微博用户接口
    vue微博回调空页面
    微博回调接口
  • 原文地址:https://www.cnblogs.com/ymj0906/p/3003497.html
Copyright © 2020-2023  润新知