• F# Grab Web page


    open System
    open System.Diagnostics
    open System.Net
    open System.Xml
    open System.IO
    //open HtmlAgilityPack   
    let asyncGrapUrl(newUrl : string) =
        async{            
            let fileName = @"D:\" + newUrl.Replace('.','0').Replace('/','0').Replace(':','0') + ".xml"
            let httpRequest = HttpWebRequest.Create(newUrl) :?> HttpWebRequest
            let! httpRespon = Async.AwaitTask(httpRequest.GetResponseAsync())
            let responStream = httpRespon.GetResponseStream()
    
    //        let xml = new XmlDocument()
    //        xml.Load(responStream)
    
            let fileStream = new System.IO.FileStream(fileName,FileMode.OpenOrCreate,FileAccess.Write)
                
            let streamWr = new StreamWriter(fileStream,Text.Encoding.GetEncoding("GB2312"))
            use strd =new StreamReader(responStream)
    
            while(not strd.EndOfStream ) do            
                streamWr.WriteLine(strd.ReadLine())
                streamWr.Flush()
            
            
            fileStream.Close() 
            responStream.Close()
            return fileName//,xml)
        } |> Async.RunSynchronously   
         
    let main() =
        let url = @"http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/sci/tech/rss.xml"
        let asyncResults = asyncGrapUrl(url)
        let filename =  asyncResults
        let xml = new XmlDocument()//snd asyncResults
        let fileInfo = new System.IO.FileStream(filename,FileMode.Open,FileAccess.Read)  
        let fileStr = new StreamReader(fileInfo,Text.Encoding.GetEncoding("GB2312"))
        xml.Load(fileStr)
    
        let nodes = xml.SelectNodes("/rss/channel/item/title")
    
        for i in 0..(nodes.Count - 1) do
            printfn "%d : %s" (i + 1) nodes.[i].InnerText 
    
        let item = int(Console.ReadLine())
        let newUrl =
            let xpath = sprintf "/rss/channel/item[%i]/link" item
            let node = xml.SelectSingleNode(xpath)
            node.InnerText
        let proStart = new ProcessStartInfo(UseShellExecute=true,FileName=newUrl)
        let proc = new Process()
        proc.StartInfo <- proStart
        proc.Start() |> ignore
        asyncGrapUrl(newUrl) |> ignore
    
    main()

    目前还没有完善, 中文乱码。。 在英文系统下, 没有实现抓取正文,只获取全部源代码。

  • 相关阅读:
    Silverlight4 GDR3与Silverlight5 EAP1的变化
    使用微软WPF技术开发产品优势究竟在那里
    于娟——《活着就是王道》博客精华文摘
    Silverlight中开发和设计人员的合作
    ubuntu10.10编译内核步骤
    添加系统调用实验步骤
    SinaWeiboSdk c++test
    【转】windows7下硬盘安装linux,双系统共存
    cppunit在vs2008下使用的环境搭建(上)
    【转】RedHat Linux 5 安装 OpenOffice 3.2.0
  • 原文地址:https://www.cnblogs.com/FsharpZack/p/2844647.html
Copyright © 2020-2023  润新知