• 获取新浪读书频道,书的列表程序


    using System;
    using System.Net;
    using System.IO;
    using System.Text;
    using System.Collections;
    using System.Text.RegularExpressions;

    namespace ConsoleApplication1
    {
        
    /// <summary>
        
    /// Class1 的摘要说明。
        
    /// </summary>

        class Class1
        
    {
            
    /// <summary>
            
    /// 应用程序的主入口点。
            
    /// </summary>

            [STAThread]
            
    static void Main(string[] args)
            
    {
                
    string url="http://book.sina.com.cn/nzt/lit/dixiashi/index.shtml";
                
    string content=Gethtml(url);

                
    string[,] arr=new string[85,85];
                arr[
    0,0]="";
                
    for(int i=1;i<=84;i++)
                
    {
                    url
    ="<a href=/nzt/lit/dixiashi/"+i+".shtml target=_blank class=a03>";
                    
    string temp_1=GetChinese("[\u4e00-\u9fa5]",MID(content,url,80));//链接文字
                    string temp_2="Books_zg_"+i+".html";
                    arr[i,
    0]=temp_1;
                    arr[
    0,i]=temp_2;
                }


                content
    =Getlist(4,700,arr);
                Writefile(
    @"C:\Documents and Settings\Administrator\桌面\index.html",content);
            }


            
    private static string MID(string Content,string StartString,int length)
            
    {
                
    string Intercept=Content;
                
    int a=Intercept.IndexOf(StartString);
                
    string aa=Intercept.Substring(a,length);
                
    return aa;
            }


            
    /// <summary>
            
    /// 获取网页html代码
            
    /// </summary>
            
    /// <param name="url"></param>
            
    /// <returns></returns>

            private static string Gethtml(string url)
            
    {
                WebClient wc
    =new WebClient();
                Stream str
    =wc.OpenRead(url);
                StreamReader sr
    =new StreamReader(str,System.Text.Encoding.GetEncoding("GB2312"));
                
    return sr.ReadToEnd();
            }


            
    // 获取指定网页的HTML代码
            static string GetPageSource(string URL)
            
    {
                Uri uri 
    =new Uri(URL);

                HttpWebRequest hwReq 
    = (HttpWebRequest)WebRequest.Create(uri);
                HttpWebResponse hwRes 
    = (HttpWebResponse)hwReq.GetResponse();

                hwReq.Method 
    = "Get";

                hwReq.KeepAlive 
    = false;

                StreamReader reader 
    = new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));

                
    return reader.ReadToEnd();
            }



            
    /// <summary>
            
    /// 获取列表
            
    /// </summary>
            
    /// <param name="col">列数</param>
            
    /// <param name="ww">table的宽度</param>
            
    /// <param name="arr">数组</param>
            
    /// <returns></returns>

            static string  Getlist(int col,int ww,string[,] arr)
            
    {
                
    int temp_1=arr.GetLength(0);
                
    int row=(int)System.Math.Ceiling(temp_1/Convert.ToDouble(col));//行数

                
    int temp_2=0;

                
    int temp_3=(int)System.Math.Floor(ww/Convert.ToDouble(col));//得到每列的宽度


                StringBuilder sb
    =new StringBuilder();
                sb.Append(
    "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=gb2312\"><title>书的列表</title><style type=\"text/css\"><!--BODY {SCROLLBAR-FACE-COLOR: #c5c5c5; MARGIN: 0px; FONT: 12px 宋体; SCROLLBAR-HIGHLIGHT-COLOR: #c5c5c5; SCROLLBAR-SHADOW-COLOR: #c5c5c5; SCROLLBAR-3DLIGHT-COLOR: #c5c5c5; SCROLLBAR-ARROW-COLOR: #ffffff; SCROLLBAR-TRACK-COLOR: #fffffd; SCROLLBAR-DARKSHADOW-COLOR: #c5c5c5;font-size:13px;}A.a03:link {COLOR: #1E1E9C; TEXT-DECORATION: underline}A.a03:visited {    COLOR: #6d6e71; TEXT-DECORATION: none}A.a03:active {COLOR: #ff0000; TEXT-DECORATION: none}A.a03:hover {COLOR: #ff0000; TEXT-DECORATION: none}td{font-size:13px;}--></style></head><body><table width=\""+ww+"\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\">");
                
    for(int i=1;i<=row;i++)
                
    {
                    sb.Append(
    "<tr>");
                    
    for(int j=0;j<col;j++)
                    
    {
                        temp_2
    ++;
                        sb.Append(
    "<td width=\""+temp_3+"\">");
                        
    try
                        
    {
                            sb.Append(
    "<a href=\""+arr[0,temp_2]+"\" target=\"_blank\" class=\"a03\">"+arr[temp_2,0]+"</a>");
                        }

                        
    catch
                        
    {
                        }

                        sb.Append(
    "</td>");
                    }

                    sb.Append(
    "</tr>");
                    sb.Append(
    "<tr><td colspan=\""+col+"\" height=\"7\"></td></tr>");
                }

                sb.Append(
    "</table></body></html>");

                
    return sb.ToString();
            }



            
    /// <summary>
            
    /// 写文件
            
    /// </summary>
            
    /// <param name="path"></param>
            
    /// <param name="text"></param>

            private static void Writefile(string path,string text)
            
    {
                
    using(StreamWriter sw=new StreamWriter(path,false,System.Text.Encoding.GetEncoding("gb2312"))) //中文,QuickCHM支持
                {
                    sw.Write(text);
                }

            }


            
    // 提取HTML代码中的网址
            static ArrayList GetHyperLinks(string htmlCode)
            
    {
                ArrayList al 
    = new ArrayList();

                
    string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";

                Regex r 
    = new Regex(strRegex,RegexOptions.IgnoreCase);
                MatchCollection m 
    = r.Matches(htmlCode);

                
    for(int i=0; i<=m.Count-1; i++)
                
    {
                    
    bool rep = false;
                    
    string strNew = m[i].ToString();

                    
    // 过滤重复的URL
                    foreach(string str in al)
                    
    {
                        
    if(strNew==str)
                        
    {
                            rep 
    =true;
                            
    break;
                        }
       
                    }


                    
    if(!rep) al.Add(strNew);
                }


                al.Sort();

                
    return al;
            }



            
    static string GetChinese(string reg,string str)
            
    {
                
    int temp=str.IndexOf("(");
                
    string temp_1=str.Substring(temp+1,1);
                Regex r 
    = new Regex(reg,RegexOptions.IgnoreCase);
                MatchCollection m 
    = r.Matches(str);
                
                
    string strNew="";
                
    for(int i=0; i<=m.Count-1; i++)
                
    {
                    strNew 
    += m[i].ToString();
                }


                
    return strNew+"("+temp_1+")";
            }


        }

    }

  • 相关阅读:
    Hyper-v: Snapshot merge
    解决Visual Studio 2010 “无法导入以下密钥文件” 错误
    Wix使用整理(二)
    Wix使用整理(一)
    C# 打开指定目录并定位到文件
    常用dos命令
    使用IE9、FireFox与Chrome浏览WPF Browser Application(.XBAP)的方式
    .NET Versioning and Multi-Targeting
    WPF-命令
    在WPF中显示动态GIF
  • 原文地址:https://www.cnblogs.com/wang123/p/602777.html
Copyright © 2020-2023  润新知