EmailSpider:
public void getSpiderForURL(URL url){ HttpURLConnection conn = url.openConnection(); //创建一个HttpURLConnection对象,用来操作URL中的内容 int code = conn.getResponseCode();//获得服务器返回的状态码 if(code==HttpURLConnection.HTTP_OK){ InputStream is = conn.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); String line =""; while((line = br.readLine())){ parse(line); } } } private void parse(String line){ String regStr = "[\w[.-]]+@[\w&&[^_][.-]]+\.\w+";//正则表达式 Pattern p = Pattern.compile(regStr,Pattern.CASE_INSENSITIVE);//将正则表达式添加到编译器中 Matcher m = p.matcher(line);//在匹配器中将字符和编译器里面的正则表达式匹配 m.matches(); while(m.find()){ System.out.println(m.group()); } }
EmailSpiderTest:
EmailSpider es = new EmailSpider(); String str = "http://blog.sina.com.cn/s/blog_515617e60101e151.html"; URL url = new URL(str); es.getSpiderForURL(url);