//根据Url地址得到网页的html源码
private string GetWebContent(string sUrl)
{
// string sURL="";
string sLine = "";
string sLinepage = "";
int i = 0;
WebRequest wrGETURL;
//for (int j=1;j<=1;j++)
//{
// sURL = " http://app1.sfda.gov.cn/datasearch/face3/content.jsp?tableId=25&tableName=TABLE25&tableView=%E5%9B%BD%E4%BA%A7%E8%8D%AF%E5%93%81&Id=1";//+j.ToString();
wrGETURL = WebRequest.Create(sUrl);
Stream objStream = wrGETURL.GetResponse().GetResponseStream();
StreamReader objReader = new StreamReader(objStream);
sLinepage = "";
sLine = objReader.ReadLine();
while (sLine!=null)
{
i++;
sLine = objReader.ReadLine();
if (sLine!=null)
{
sLinepage=sLinepage+sLine;
}
}
return sLinepage;
}
//得到指定字串之间的数据
private string SplitStr(string src, string startstr, string stopstr)
{
//找到开始字符的位置
string resultstr;
int startpos=0;
int stoppos=0;
MatchCollection Matches = Regex.Matches(src, startstr, RegexOptions.None);
foreach (Match NextMatch in Matches)
{
startpos=NextMatch.Index+startstr.Length;
}
MatchCollection Matches2 = Regex.Matches(src, stopstr, RegexOptions.None);
foreach (Match NextMatch2 in Matches2)
{
stoppos = NextMatch2.Index;
}
if (stoppos < startpos)
stoppos = startpos;
if (stopstr == "结束符")
stoppos = src.Length;
resultstr = src.Substring(startpos, stoppos - startpos);
// MessageBox.Show(resultstr);
return resultstr;
}
private void button1_Click(object sender, EventArgs e)
{
//要抓取的URL地址
string date1 = DateTime.Now.ToString("yymmddhhmmss");
MessageBox.Show(date1);
int j;
string src;
string strWebContent;
string desc, sr, sp;
desc = "";
WebBrowser webfda = new WebBrowser();
for (j = 1; j <= 10; j++)
{
string Url = "http://app1.sfda.gov.cn/datasearch/face3/content.jsp?tableId=26&tableName=TABLE26&tableView=%E5%9B%BD%E4%BA%A7%E5%99%A8%E6%A2%B0&Id=" + j.ToString();
//得到指定Url的源码
strWebContent = GetWebContent(Url);
//生成HtmlDocument
label1.Text = j.ToString();
webfda.Navigate("about:blank");
HtmlDocument htmldoc = webfda.Document.OpenNew(true);
htmldoc.Write(strWebContent);
//textBox1 .Text= htmldoc.Body.InnerHtml;
// textBox2.Text = htmldoc.Body.InnerText;
//生产场所
src = htmldoc.Body.InnerText;
sr = "生产场所";
sp = "变更日期";//
desc = SplitStr(src, sr, sp);
sr = "变更日期";
sp = "备注";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "备注";
sp = "注册号";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "注册号";
sp = "生产单位";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "生产单位";
sp = "地址";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "地址";
sp = "邮编";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "邮编";
sp = "产品名称";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "产品名称";
sp = "产品标准";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "产品标准";
sp = "产品性能结构及组成";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "产品性能结构及组成";
sp = "有效期";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "有效期";
sp = "批准日期";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "批准日期";
sp = "产品适用范围";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "产品适用范围";
sp = "规格型号";//
desc = desc + "," + SplitStr(src, sr, sp);
sr = "规格型号";
sp = "结束符";//
desc = desc + "," + SplitStr(src, sr, sp) + "\n";
label2.Text = j.ToString();
textBox2.Text = textBox2.Text + desc;
desc = "";
}
}