• [重点]delphi 实现 根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、副题、作者和正文。


    项目要求:根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、作者和正文。

    
    
    unit Unit1;
    
    interface
    
    uses
      Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
      Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, IdBaseComponent,
      IdComponent, IdTCPConnection, IdTCPClient, IdHTTP, Vcl.ComCtrls;
    
    type
      TForm1 = class(TForm)
        IdHTTP1: TIdHTTP;
        Button1: TButton;
        Label1: TLabel;
        Edit1: TEdit;
        ProgressBar1: TProgressBar;
        Memo1: TMemo;
        Button2: TButton;
        Memo2: TMemo;
        procedure Button1Click(Sender: TObject);
        procedure FormCreate(Sender: TObject);
        procedure Button2Click(Sender: TObject);
      private
        { Private declarations }
      public
        { Public declarations }
      end;
    
    var
      Form1: TForm1;
    
    implementation
           uses StrUtils,HttpApp;
    {$R *.dfm}
    
    type
      TDelFlags = set of (dfDelBefore, dfDelAfter);
    
    
    function Delstr(var ms: String; endstr: String; Flags: TDelFlags;
      bself: Boolean = True): String;
    var
      l: Integer;
    begin
      l := length(endstr);
      if dfDelBefore in Flags then
      begin
        if bself then
        begin
          Result := copy(ms, 1, pos(endstr, ms) + l - 1);
          Delete(ms, 1, pos(endstr, ms) + l - 1);
        end
        else
        begin
          Result := copy(ms, 1, pos(endstr, ms) - 1);
          Delete(ms, 1, pos(endstr, ms) - 1);
        end;
      end
      else
      begin
        if bself then
        begin
          Result := copy(ms, pos(endstr, ms), length(ms));
          Delete(ms, pos(endstr, ms), length(ms));
        end
        else
        begin
          Result := copy(ms, pos(endstr, ms) + l, length(ms));
          Delete(ms, pos(endstr, ms) + l, length(ms));
        end;
      end;
    end;
    
    procedure DelstrEx(var ms: String; endstr: String;
      var DelData: String; Flags: TDelFlags; bself: Boolean = True);
    var
      l: Integer;
    begin
      l := length(endstr);
      if dfDelBefore in Flags then
      begin           //删除字符串的前半部分
        if bself then //连同自己一起删除
        begin
          DelData := copy(ms, 1, pos(endstr, ms) + l - 1);
          Delete(ms, 1, pos(endstr, ms) + l - 1);
        end
        else
        begin
          DelData := copy(ms, pos(endstr, ms) - 1, length(ms));
          Delete(ms, 1, pos(endstr, ms) - 1);
        end;
      end
      else
      begin
        if bself then
        begin
          DelData := copy(ms, pos(endstr, ms), length(ms));
          Delete(ms, pos(endstr, ms), length(ms)); //连同自己一起删除
        end
        else
        begin
          DelData := copy(ms, pos(endstr, ms) + l, length(ms));
          Delete(ms, pos(endstr, ms) + l, length(ms));
        end;
      end;
    end; {DelstrEx}
    
    
    function GetCenterStr(src, str1, str2: String): String;
    var
      i, i2, i3: Integer;
    begin
      i := 0;
      i2 := 0;
      i3 := 0;
      Delstr(src, str1, [dfDelBefore]);
      i := pos(AnsiLowercase(str1), AnsiLowercase(src));
      i3 := pos(AnsiLowercase(str2), AnsiLowercase(src));
      Result := copy(src, i2 + 1, i3 - i2 - 1);
    end;
    
    
    function delstrByNum(ss:string;uniqueFlag:string;disapperNum:integer;FromFlags: TDelFlags;bReturnDeletedPart:boolean):string;
    var _num:integer;
        _Str:string;
    begin
         _num:=0;
         _Str:=ss;
    
         result:='';
    
         while _num<disapperNum do
         begin
             if dfDelBefore in FromFlags then   //从字符串左端开始删除
             begin
                delstr(_Str,uniqueFlag,FromFlags);
             end
             else
             begin  //从字符串右端开始删除
               _Str:= StrUtils.ReverseString(_Str) ;
    
               if bReturnDeletedPart then
                  delstrEx(_Str,StrUtils.ReverseString(uniqueFlag),result,[dfdelbefore])
               else
                  delstr(_Str,StrUtils.ReverseString(uniqueFlag),[dfdelbefore]);
    
                 _Str:= StrUtils.ReverseString(_Str) ;
             end;
    
              inc(_num);
         end;
    
         if result='' then result:=_Str
         else  result:= StrUtils.ReverseString(result) ;
    end;
    
    
    
    
    function Matchstrings(Source, pattern: String): Boolean;
    var
      pSource: array[0..255] of Char;
      pPattern: array[0..255] of Char;
      function MatchPattern(element, pattern: PChar): Boolean;
        function IsPatternWild(pattern: PChar): Boolean;
        begin
          Result := StrScan(pattern, '*') <> nil;
          if not Result then
            Result := StrScan(pattern, '?') <> nil;
        end;
      begin
        if 0 = StrComp(pattern, '*') then
          Result := True
        else if (element^ = Chr(0)) and (pattern^ <> Chr(0)) then
          Result := False
        else if element^ = Chr(0) then
          Result := True
        else
        begin
          case pattern^ of
            '*':
              if MatchPattern(element, @pattern[1]) then
                Result := True
              else
                Result := MatchPattern(@element[1], pattern);
              '?':
              Result := MatchPattern(@element[1], @pattern[1]);
            else
              if element^ = pattern^ then
                Result := MatchPattern(@element[1], @pattern[1])
              else
                Result := False;
          end;
        end;
      end;
    begin
      StrPCopy(pSource, Source);
      StrPCopy(pPattern, pattern);
      Result := MatchPattern(pSource, pPattern);
    end; {匹配字符串函数}
    
    
    {从磁盘中搜索指定类型的所有文件}
    procedure FindFiles(ASourceDir, SearchFileType: String; var List: TStrings);
    var
      FileRec: TSearchrec;
      Sour, OldFileName, NewFileName: String;
      fs: TFileStream;
    begin
      Sour := ASourceDir;
      if Sour[length(Sour)] <> '' then
        Sour := Sour + '';
      if FindFirst(Sour + '*.*', faAnyfile, FileRec) = 0 then
        {循环}
        repeat
          if ((FileRec.Attr and faDirectory) <> 0) then
          begin
            if (FileRec.Name <> '.') and (FileRec.Name <> '..') then //找到目录
            begin
              FindFiles(Sour + FileRec.Name, SearchFileType, List);
            end;
          end
          else //找到文件
          begin
            if Matchstrings(LowerCase(FileRec.Name), SearchFileType) then
            begin
              List.Add(Sour + FileRec.Name);
            end; {拷贝所有类型的文件}
          end;
        until FindNext(FileRec) <> 0;
      system.SysUtils.FindClose(FileRec);
    end; {从磁盘中搜索指定类型的所有文件}
    
    
    
    procedure RmHtmlTags(var src: string);
      function DelTag(var src: string): boolean;
      var
        iPosS, iPosE: integer;
      begin
        result := False;
        if pos('<script', AnsiLowerCase(src)) > 0 then
          begin
            iPosS := pos('<script', AnsiLowerCase(src));
            if iPosS > 0 then
              begin
                iPosE := pos('</script>', AnsiLowerCase(src));
                result := iPosE > iPosS;
                if result then
                  Delete(src, iPosS, iPosE - iPosS + 9);
              end;
          end
        else
          begin
            iPosS := pos('<', src);
            if iPosS > 0 then
              begin
                iPosE := pos('>', src);
                result := iPosE > iPosS;
                if result then
                  Delete(src, iPosS, iPosE - iPosS + 1);
              end;
          end;
      end;
    begin
      //src := LowerCase(src);
      src := src;
      repeat
      until not DelTag(src);
    end;
    
    procedure RmHtmlTagsEx(var src: string);
      function DelTag(var src: string): boolean;
      var
        iPosS, iPosE: integer;
      begin
        result := False;
        if pos('<script', AnsiLowerCase(src)) > 0 then
          begin
            iPosS := pos('<script', AnsiLowerCase(src));
            if iPosS > 0 then
              begin
                iPosE := pos('</script>', AnsiLowerCase(src));
                result := iPosE > iPosS;
                if result then
                  Delete(src, iPosS, iPosE - iPosS + 9);
              end;
          end
        else
        if pos('<style', AnsiLowerCase(src)) > 0 then
          begin
            iPosS := pos('<style', AnsiLowerCase(src));
            if iPosS > 0 then
              begin
                iPosE := pos('</style>', AnsiLowerCase(src));
                result := iPosE > iPosS;
                if result then
                  Delete(src, iPosS, iPosE - iPosS + 9);
              end;
          end
        else
          begin
           { iPosS := pos('<', src);
            if iPosS > 0 then
              begin
                iPosE := pos('>', src);
                result := iPosE > iPosS;
                if result then
                  Delete(src, iPosS, iPosE - iPosS + 1);
              end; }
          end;
      end;
    begin
      //src := LowerCase(src);
      src := src;
      repeat
      until not DelTag(src);
    end;
    
    
    function UrlDecoder(const AUrl:string):string;
    begin
      result:= UTF8Decode(HttpDecode(AUrl));
    end;
    
    function UrlEncoder(const AUrl:string):string;
    begin
    //URL编码通常使用“+”来替换空格。
      result:=HttpEncode(UTF8Encode(AUrl));
    end;
    
    
    function  getResURL(http:TIdHttp;searchWord:string):string;
    var info:tstringlist;
       res:tstringstream;
       tURL:string;
      MemoText: string;
    begin
       http.HandleRedirects:=true;
       http.Request.UserAgent:='Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.2)';
       http.Request.Host:='search.cyol.com';
       http.Request.ContentType:='application/x-www-form-urlencoded';
       http.Request.Referer:='http://search.cyol.com/index.htm';
       http.request.CacheControl:='no-cache';
       http.HTTPOptions:=http.HTTPOptions+[hoKeepOrigProtocol];
    
       try
          info:=tstringlist.Create;
         res:=tstringstream.Create('',TEncoding.UTF8);
    
        {
          info.Add('op=new');
         info.Add('searchBtn=搜索');
         info.Add('searchText='+searchWord); //全站内模糊搜索
         // info.Add('searchText=一日为师 终身挨骂?');
        }
         info.Add('ak=');
         info.Add('ck=');
         info.Add('df=');
         info.Add('dt=');
         info.Add('nk=4');
         info.Add('od=date');
         info.Add('op=adv');
         info.Add('tk='+searchWord);
    
         tURL:='http://search.cyol.com/searchh.jsp';
         http.Post(tURL,info,res);
         MemoText:= res.DataString;
    
         delstr(MemoText,'resultdiv',[dfdelbefore]);
    
         //showmessage(MemoText);
    
         if pos('color:red',ansilowercase(MemoText))=0 then
         begin
              result:='';
              Exit;
         end;
    
    
         delstr(MemoText,'>',[dfdelbefore]);
         delstr(MemoText,'<a',[dfdelbefore]);
         delstr(MemoText,'http:',[dfdelbefore],false);
         delstr(MemoText,'.htm',[dfdelafter],false);
    
    
         result:=MemoText;
    
    
       finally
          freeandnil(info);
          freeandnil(res);
          //http.Free;
       end;
    end;
    
    function getHtmlStr(http:TIdHttp;fURL:string):string;
    begin
       if assigned(http) and (http is TIdHttp) and (http<>nil) then
        result:=  http.Get(fURL);
    end;
    
    
    
    procedure TForm1.Button1Click(Sender: TObject);
    
    var htmlText:string;
      biaoti: string;
      Author: string;
      yinti: string;
      table_Pos: Integer;
      ss: string;
      outdata: string;
      neirong: string;
      zhenwen: string;
      frontPart: string;
      subtitle: string;
      txtList: TStrings;
      i: Integer;
      readtxt: TStringList;
      zhenti: string;
      resURL: string;
    
    begin
       button1.Caption:='正在处理'; button1.Enabled:=false;
    
     { htmlText:=  getHtmlStr(idHTTP1, getResURL(idHTTP1,'一日为师 终身挨骂?') );
    
      frontPart:=htmlText;
    
      delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
      delstr(frontPart,'/enpproperty',[dfdelafter]);
    
      Author:=  GetCenterStr(frontPart,'<author>','</author>');    //作者
      subtitle:=  GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
      yinti:=  GetCenterStr(frontPart,'<introtitle>','</introtitle>');  //引题
    
    
      //取正文
      zhenwen:=htmlText;
      delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
      delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);
      Memo1.Text:=zhenwen;
    
      }
    
      if not directoryExists(edit1.Text) then
      begin
    
         showmessage('请输入标引txt的路径!');
        exit;
      end;
    
    
      txtList:=tstringlist.Create ;
      readtxt:=TStringlist.Create ;
      findfiles(edit1.Text,'*.txt',txtList);
    
      ProgressBar1.Position:=0;
      ProgressBar1.Max:=txtlist.Count;
    
    
    
      try
    
      for i := 0 to txtList.Count-1 do
      begin
           application.ProcessMessages ;
           ProgressBar1.Position:=i+1;
    
           readtxt.LoadFromFile(txtList[i]);
    
            zhenti:=readtxt.Values['<主题>'];
    
            htmlText:='';  zhenwen:='';
            author:='';subtitle:=''; yinti:='';
    
    
            resURL:=getResURL(idHTTP1,trim(zhenti));
    
            if ''<>trim(resURL) then
            begin
    
                htmlText:=  getHtmlStr(idHTTP1,  resURL);
    
                frontPart:=htmlText;
    
                delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
                delstr(frontPart,'/enpproperty',[dfdelafter]);
    
                Author:=  GetCenterStr(frontPart,'<author>','</author>');    //作者
                subtitle:=  GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
                yinti:=  GetCenterStr(frontPart,'<introtitle>','</introtitle>');  //引题
    
                //取正文
                zhenwen:=htmlText;
                delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
                delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);
    
                RmHtmlTagsEx(zhenwen);
    
                if ''<>trim(yinti) then readtxt.Values['<引题>']:=yinti;
                if ''<>trim(subtitle) then readtxt.Values['<副题>']:=subtitle;
                if ''<>trim(author) then readtxt.Values['<作者>']:=author;
                if ''<>trim(zhenwen) then readtxt.Values['<正文>']:=slinebreak+trim(zhenwen);
    
                readtxt.SaveToFile(txtList[i]);
    
                readtxt.Clear ;
            end
            else
            begin
                 Memo2.Lines.Add('未找到对应数据:'+txtList[i]);
            end;
    
      end; // for i end
    
      if ProgressBar1.Max=ProgressBar1.Position then
      begin
          showmessage('处理完成!');
      end;
      finally
         button1.Caption:='开始处理'; button1.Enabled:=true;
          freeandnil(readtxt);
          freeandnil(txtlist);
      end;
    
    
    
    
    
    
    
    
    
    
    {  delstr(htmlText,'<body',[dfdelbefore]);
      biaoti:='biaoti';
      //取作者
      Author:=htmlText;
      delstr(Author,biaoti,[dfdelbefore]);
      delstr(Author,'rc-writer',[dfdelbefore]);
      delstr(Author,'>',[dfdelbefore]);
      delstr(Author,'<',[dfdelafter]);
    
      showmessage(Author);
    
      //取引题
      yinti:=htmlText;
      delstr(yinti,biaoti,[dfdelafter]);
      table_Pos:=0;
     //example:   ss:='<table>ccc</table><table>ddd</table>';
       yinti:=delstrByNum(yinti,'<table',1,[dfdelafter],true)+'>';
       RmHtmlTags(yinti);
       showmessage(yinti );
    
     //取正文内容
     neirong:='neirong';
     zhenwen:=htmlText;
     delstr(zhenwen,neirong,[dfdelbefore]);
     delstr(zhenwen,'<P',[dfdelbefore],false);
     delstr(zhenwen,'<script',[dfdelafter]);
     Memo1.Text:=zhenwen;
     }
    
    
    
    
    
    
    
    
    
    
    
    end;
    
    procedure TForm1.Button2Click(Sender: TObject);
    var
      ss: string;
    begin
       ss:=Memo1.Text;
       RmHtmlTagsEx(ss);
       memo1.Text:=ss;
    end;
    
    procedure TForm1.FormCreate(Sender: TObject);
    begin
    edit1.Clear ;
    memo2.Clear ;
    end;
    
    end.
    
    
    
     
  • 相关阅读:
    PIE-Basic 频率域滤波
    使用CefSharp前端后台交换
    CefSharp F12打开DevTools查看console js和c#方法互相调用
    js和C#互相调用
    C# 矢量图EMF 总结
    key
    关于IdentityServer4不使用MVC页面进行登录(跨域发送验证请求)的一些问题(前后端分离的验证)
    C#版的省份编码字典
    EFCore显示加载模式下,自动包含导航属性(只包含第一层的导航属性)的方法
    EFCore批量实现全局查询筛选器
  • 原文地址:https://www.cnblogs.com/yzryc/p/6494161.html
Copyright © 2020-2023  润新知