1, https://blog.csdn.net/qq_24076135/article/details/78045034
2. http://www.vogella.com/tutorials/JavaRegularExpressions/article.html#java-regex-examples
3. https://www.w3cschool.cn/java/java-regex-character-classes.html
4. 提取文档内容:
package com.happySpider;
import java.io.*;
import java.net.*;
public class Main {
public static void main(String[] args) {
String urlTarget = "http://yun.52tencent.com:808/api/simple/nuomi/eat/meishi/2";
String happyOutputPath = "D:/happySpider/";
try {
URL happyUrl = new URL(urlTarget);//URl对象
URLConnection happyConnect = happyUrl.openConnection();//建立一个链接
InputStream happyStream = happyConnect.getInputStream();//创建为一个字节流
BufferedReader/*缓存 ¥ 类*/ happyBuffer = new BufferedReader(new InputStreamReader(happyStream,"UTF-8"));//字节流 编码形式 把字节流转换成字符流的缓冲区
PrintWriter happyOutputFile = new PrintWriter/*保存文件*/(new File(happyOutputPath+System.currentTimeMillis()/*转换成毫秒的时间,且永远不会重复*/+".doc"));
String happyLine;
while((happyLine = happyBuffer.readLine()/*逐行读取*/)!= null)
{
System.out.println(happyLine);
happyOutputFile.println(happyLine);
}
happyOutputFile.close();
happyBuffer.close();
}
catch(IOException ex){//定义了一个输入输出异常对象叫ex
ex.printStackTrace();
}
}
}