• CDocument


     1 #ifndef _Document_H_040410_
     2 #define _Document_H_040410_
     3 
     4 #include <string>
     5 
     6 typedef struct{
     7         int docid;
     8         int offset;
     9 }DocIdx;
    10 
    11 using namespace std;
    12 
    13 class CDocument
    14 {
    15 public:
    16 
    17     int m_nDocId;
    18     int m_nPos;
    19     int m_nLength;
    20     string m_sChecksum;
    21 
    22     string m_sUrl;
    23     string m_sRecord;    // a record including a HEAD, a header and body
    24     string m_sHead;
    25     string m_sHeader;
    26     string m_sBody;
    27 
    28     string m_sBodyNoTags;
    29 
    30 public:
    31     CDocument();
    32     ~CDocument();
    33 
    34     bool ParseRecord(string &content) const;
    35     bool CleanBody(string &body) const;
    36 
    37     void RemoveTags(char *s);
    38 };
    39 
    40 #endif /* _Document_H_040410_ */
     1 /*Document handling
     2  */
     3 
     4 #include "Document.h"
     5 
     6 CDocument::CDocument()
     7 {
     8     m_nDocId = -1;
     9     m_nPos = -1;
    10     m_nLength = 0;
    11     m_sChecksum = "";
    12 
    13     m_sUrl = "";
    14 }
    15 
    16 CDocument::~CDocument()
    17 {
    18 }
    19 
    20 bool CDocument::ParseRecord(string &content) const
    21 {
    22     return true;
    23 }
    24 
    25 bool CDocument::CleanBody(string &body) const
    26 {
    27     return true;
    28 }
    29 
    30 //把  <...> 删掉
    31 void CDocument::RemoveTags(char *s)
    32 {
    33     int intag;
    34     char *p, *q;
    35 
    36     if (!s || !*s)    return;
    37 
    38     for (p=q=s, intag=0; *q; q++) {
    39         switch (*q){
    40         case '<':
    41             intag = 1;
    42             *p++ = ' ';
    43             break;
    44         case '>':
    45             intag = 0;
    46             break;
    47         default:
    48             if (!intag) {
    49                 *p++ = *q;
    50             }
    51             break;
    52         }
    53     }
    54 
    55     *p = '\0';
    56 
    57 /* second method
    58     char *d = s;
    59     while (*s) {
    60         if (*s == '<') {
    61             while (*s && *s!='>') s++;
    62             if( *s == '\0') break;
    63             s++; 
    64             continue; 
    65         } 
    66 
    67         *d++ = *s++; 
    68     }
    69     *d = 0;
    70 */
    71 }
  • 相关阅读:
    数据结构与算法分析(1)引论
    每天一个Linux命令(1)ls命令
    Oracle ->> 层级查询语句(hierarchical query)connect by
    SQL Server ->> GROUPING SETS, CUBE, ROLLUP, GROUPING, GROUPING_ID
    Oracle ->> 日期函数
    Oracle ->> 变量赋值 Demo
    SQL SERVER ->> BCP导出数据到平面文件
    SQL SERVER ->> CXPacket等待类型
    SQL SERVER ->> Wait Stats
    SQL SERVER ->> Columnstore Index
  • 原文地址:https://www.cnblogs.com/kakamilan/p/2591425.html
Copyright © 2020-2023  润新知