• mini爬虫程序


    Code
    class MiniCrawler
        {
            
    // Find a link in a content string.
            static string FindLink(string htmlstr, ref int startloc)
            {
                
    int i;
                
    int start, end;
                
    string uri = null;
                
    string lowcasestr = htmlstr.ToLower();
                i 
    = lowcasestr.IndexOf("href=\"http", startloc);
                if (i != -1)
                {
                    start 
    = htmlstr.IndexOf('"', i) + 1;
                    end 
    = htmlstr.IndexOf('"', start);
                    uri 
    = htmlstr.Substring(start, end - start);
                    startloc 
    = end;
                }
                
    return uri;

            }
            
    public static void Crawle(string uristr)
            {
                
    string link = null;
                
    string str;
                
    string answer;
                
    int curloc; // holds current location in response
                try
                {
                    
    do
                    {
                        Console.WriteLine(
    "Linking to " + uristr);
                        
    // 创建一个指定URI的WebRequest
                        HttpWebRequest req = (HttpWebRequest)
                        WebRequest.Create(uristr);

                        
    // 发送reques得到返回的response.
                        HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
                        
    // 从返回的内容中获得数据流
                        Stream istrm = resp.GetResponseStream();

                        StreamReader rdr 
    = new StreamReader(istrm);
                        
    // 读取整个页面
                        str = rdr.ReadToEnd();
                        curloc 
    = 0;
                        
    do
                        {
                            
    // 查找下一个uri
                            link = FindLink(str, ref curloc);
                            
    if (link != null)
                            {
                                Console.WriteLine(
    "发现链接: " + link);
                                Console.Write(
    "Link, More, Quit?");
                                answer 
    = Console.ReadLine();
                                
    if (string.Compare(answer, "L"true== 0)
                                {
                                    uristr 
    = string.Copy(link);
                                    
    break;
                                }
                                
    else if (string.Compare(answer, "Q"true== 0)
                                {
                                    
    break;
                                }
                                
    else if (string.Compare(answer, "M"true== 0)
                                {
                                    Console.WriteLine(
    "Searching for another link.");
                                }
                            }
                            
    else
                            {
                                Console.WriteLine(
    "No link found.");
                                
    break;
                            }
                        } 
    while (link.Length > 0);
                        
    // Close the response.
                        resp.Close();
                    } 
    while (uristr != null);
                }
                
    catch (WebException exc)
                {
                    Console.WriteLine(
    "Network Error: " + exc.Message +
                    
    "\nStatus code: " + exc.Status);
                }
                
    catch (ProtocolViolationException exc)
                {
                    Console.WriteLine(
    "Protocol Error: " + exc.Message);
                }
                
    catch (UriFormatException exc)
                {
                    Console.WriteLine(
    "URI Format Error: " + exc.Message);
                }
                
    catch (NotSupportedException exc)
                {
                    Console.WriteLine(
    "Unknown Protocol: " + exc.Message);
                }
                
    catch (IOException exc)
                {
                    Console.WriteLine(
    "I/O Error: " + exc.Message);
                }
                Console.WriteLine(
    "Terminating MiniCrawler.");
            }
        }
  • 相关阅读:
    组原——④存储器4
    sdk和api的区别
    转载:直播测试
    生成短链接
    H5调原生
    Android Intent 启动方法和启动Action大全
    ps和top的区别
    安卓知识点
    正则基础之——捕获组(capture group)
    正则基础之——反向引用
  • 原文地址:https://www.cnblogs.com/nuaalfm/p/1410354.html
Copyright © 2020-2023  润新知