• 发现的一个好的socket网页抓取源码


    C#使用Socket获取网页源代码的代码,需要的朋友可以参考下

    WebToolkit类:
    using System;
    using System.Net.Sockets;
    using System.Text;

    namespace ConsoleApplication1
    {
    class WebToolkit
    {
    /// <summary>
    /// Url结构
    /// </summary>
    struct UrlInfo
    {
    public string Host;
    public int Port;
    public string File;
    public string Body;
    }

    /// <summary>
    /// 解析URL
    /// </summary>
    /// <param name="url"></param>
    /// <returns></returns>
    private static UrlInfo ParseURL(string url)
    {
    UrlInfo urlInfo = new UrlInfo();
    string[] strTemp = null;
    urlInfo.Host = "";
    urlInfo.Port = 80;
    urlInfo.File = "/";
    urlInfo.Body = "";
    int intIndex = url.ToLower().IndexOf("http://");
    if (intIndex != -1)
    {
    url = url.Substring(7);
    intIndex = url.IndexOf("/");
    if (intIndex == -1)
    {
    urlInfo.Host = url;
    }
    else
    {
    urlInfo.Host = url.Substring(0, intIndex);
    url = url.Substring(intIndex);
    intIndex = urlInfo.Host.IndexOf(":");
    if (intIndex != -1)
    {
    strTemp = urlInfo.Host.Split(':');
    urlInfo.Host = strTemp[0];
    int.TryParse(strTemp[1], out urlInfo.Port);
    }
    intIndex = url.IndexOf("?");
    if (intIndex == -1)
    {
    urlInfo.File = url;
    }
    else
    {
    strTemp = url.Split('?');
    urlInfo.File = strTemp[0];
    urlInfo.Body = strTemp[1];
    }
    }
    }
    return urlInfo;
    }

    /// <summary>
    /// 发出请求并获取响应
    /// </summary>
    /// <param name="host"></param>
    /// <param name="port"></param>
    /// <param name="body"></param>
    /// <param name="encode"></param>
    /// <returns></returns>
    private static string GetResponse(string host, int port, string body, Encoding encode)
    {
    string strResult = string.Empty;
    byte[] bteSend = Encoding.ASCII.GetBytes(body);
    byte[] bteReceive = new byte[1024];
    int intLen = 0;

    using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
    {
    try
    {
    socket.Connect(host, port);
    if (socket.Connected)
    {
    socket.Send(bteSend, bteSend.Length, 0);
    while ((intLen = socket.Receive(bteReceive, bteReceive.Length, 0)) > 0)
    {
    strResult += encode.GetString(bteReceive, 0, intLen);
    }
    }
    socket.Close();
    }
    catch { }
    }

    return strResult;
    }

    /// <summary>
    /// GET请求
    /// </summary>
    /// <param name="url"></param>
    /// <param name="encode"></param>
    /// <returns></returns>
    public static string Get(string url, Encoding encode)
    {
    UrlInfo urlInfo = ParseURL(url);
    string strRequest = string.Format("GET {0}?{1} HTTP/1.1\r\nHost:{2}:{3}\r\nConnection:Close\r\n\r\n", urlInfo.File, urlInfo.Body, urlInfo.Host, urlInfo.Port.ToString());
    return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode);
    }

    /// <summary>
    /// POST请求
    /// </summary>
    /// <param name="url"></param>
    /// <param name="encode"></param>
    /// <returns></returns>
    public static string Post(string url, Encoding encode)
    {
    UrlInfo urlInfo = ParseURL(url);
    string strRequest = string.Format("POST {0} HTTP/1.1\r\nHost:{1}:{2}\r\nContent-Length:{3}\r\nContent-Type:application/x-www-form-urlencoded\r\nConnection:Close\r\n\r\n{4}", urlInfo.File, urlInfo.Host, urlInfo.Port.ToString(), urlInfo.Body.Length, urlInfo.Body);
    return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode);
    }
    }
    }

    调用示例:

    using System;
    using System.Text;

    namespace ConsoleApplication1
    {
    //调用示例
    class Program
    {
    public static void Main(string[] args)
    {
    Console.WriteLine(WebToolkit.Get("http://www.jb51.net/t.asp?keyword=vbscript", Encoding.Default));
    Console.ReadKey();
    }
    }
    }

    详细出处参考:http://www.jb51.net/article/25715.htm

  • 相关阅读:
    批处理集锦——(5)使用dir查找文件
    批处理集锦——(4)2>nul和1>nul是什么意思?
    python3循环遍历嵌套字典替换指定值
    selenium对浏览器自动截图
    linux 安装mysql8以及远程连接步骤(图文并茂)
    Allure 自动化测试报告使用详解
    allure安装教程以及遇到的坑
    pytest接口自动化快速设置接口全局host
    pytest报错警告处理一:DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
    python3.x中 pytest之fixture
  • 原文地址:https://www.cnblogs.com/lijinchang/p/2207021.html
Copyright © 2020-2023  润新知