• 下载生物信息


    http://www.catalogueoflife.org/col/browse/classification

    这是一个国外的生物信息网站

    今天的代码可以抓取指定分类的信息(id,学名)

    没有把多线程写进去,略失败...

    运用:webclient,regex,io

    项目在>>>开源中国

     1 using System;
     2 using System.Collections.Generic;
     3 using System.ComponentModel;
     4 using System.Data;
     5 using System.Drawing;
     6 using System.Linq;
     7 using System.Text;
     8 using System.Threading.Tasks;
     9 using System.Windows.Forms;
    10 using System.Net;
    11 using System.Text.RegularExpressions;
    12 using System.Threading;
    13 using System.IO;
    14 namespace cateoflife
    15 {
    16     public partial class Form1 : Form
    17     {
    18         WebClient wc = new WebClient();
    19         int start;
    20         int end;
    21         string url;
    22         string reg;
    23         string msg;
    24         int now = 1;
    25         public Form1()
    26         {
    27             InitializeComponent();
    28 
    29         }
    30 
    31         private void button1_Click(object sender, EventArgs e)
    32         {
    33             start = int.Parse(textBox2.Text);
    34 
    35             FileInfo fifo = new FileInfo(start+".txt");
    36             FileStream fs= fifo.OpenWrite();
    37             StreamWriter w = new StreamWriter(fs);
    38             w.BaseStream.Seek(0, SeekOrigin.End);
    39 
    40             end=(int.Parse(textBox3.Text)==0)?99999:int.Parse(textBox3.Text);
    41             url = textBox1.Text;
    42             reg = textBox4.Text;
    43             wc.Encoding = Encoding.UTF8;
    44             string Htm;
    45             for (int i = start; i <= end; i++)
    46             {
    47                 try
    48                 {
    49                     Htm = wc.DownloadString(url + i);
    50                     foreach (Match m in Regex.Matches(Htm, reg))
    51                     {
    52                         gettxt(m.ToString());
    53                         w.Write(msg);
    54                         w.Flush();   
    55                     }                    
    56                 }
    57                 catch (Exception)
    58                 {
    59                     Htm = wc.DownloadString(url + i);
    60                     foreach (Match m in Regex.Matches(Htm, reg))
    61                     {
    62                         gettxt(m.ToString());
    63                         w.Write(msg);
    64                         w.Flush();
    65                     }               
    66                 }                
    67                              
    68             }
    69             w.Close();
    70         }
    71         void gettxt(string html)
    72         {
    73             msg=Regex.Match(html,"(?<=/)\d+").ToString()+"	"+Regex.Match(html,"(?<=>)\w+\s*\w+").ToString()+"
    ";
    74         }
    75     }
    76 }
  • 相关阅读:
    (转)mtr命令详解诊断网络路由
    WinDbg使用介绍
    windbg-bp、 bm、 bu、 bl、 bc、 ba(断点、硬件断点)
    【转】25.windbg-!gle、g(错误码、g系列)
    umdh windbg分析内存泄露
    windbg !logexts(自带的监控API)
    windbg cs
    windbg dds、dps、dqs
    Windbg找出memory leak的一种笨办法
    【转】windows平台多线程同步之Mutex的应用
  • 原文地址:https://www.cnblogs.com/Fadinglemon/p/3737058.html
Copyright © 2020-2023  润新知