目的:本文主要描述如何使用Java+selenium爬取58同城招聘页,并记录指定职位的招聘公司名保存到本地
一、首先创建一个maven工程,配置依赖包
1 <dependencies> 2 3 <!-- selenium-java --> 4 <dependency> 5 <groupId>org.seleniumhq.selenium</groupId> 6 <artifactId>selenium-java</artifactId> 7 <version>3.4.0</version> 8 </dependency> 9 10 <!-- https://mvnrepository.com/artifact/org.apache.poi/poi --> 11 <dependency> 12 <groupId>org.apache.poi</groupId> 13 <artifactId>poi</artifactId> 14 <version>3.9</version> 15 </dependency> 16 17 <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> 18 <dependency> 19 <groupId>org.apache.poi</groupId> 20 <artifactId>poi-ooxml</artifactId> 21 <version>3.9</version> 22 </dependency> 23 </dependencies>
二、开始写入自动化测试代码
1 import org.apache.poi.xssf.usermodel.XSSFSheet; 2 import org.apache.poi.xssf.usermodel.XSSFWorkbook; 3 import org.openqa.selenium.By; 4 import org.openqa.selenium.WebDriver; 5 import org.openqa.selenium.WebElement; 6 import org.openqa.selenium.chrome.ChromeDriver; 7 import java.io.FileInputStream; 8 import java.io.FileOutputStream; 9 import java.io.IOException; 10 import java.util.concurrent.TimeUnit; 11 12 13 public class Zhaopin { 14 public static void main(String[] args) throws InterruptedException, IOException { 15 System.setProperty("webdriver.chrome.driver", "C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"); 16 WebDriver driver = new ChromeDriver(); 17 driver.manage().window().maximize(); //窗口最大化 18 driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); 19 driver.get("http://www.58.com/"); //输入网址 20 21 driver.findElement(By.xpath("//*[@id="commonTopbar_ipconfig"]/a[1]")).click(); 22 WebElement input = driver.findElement(By.xpath("//*[@id="selector-search-input"]")); 23 input.sendKeys("深圳"); //切换城市,打开默认是本地 24 25 driver.findElement(By.xpath("//*[@id="selector-search-btn"]")).click(); 26 driver.findElement(By.xpath("/html/body/div[3]/div[1]/div[1]/div/div[3]/div[1]/h2/a")).click();//打开招聘 27 Thread.sleep(1000); 28 String SecondtHandle = driver.getWindowHandle(); //首先得到最先的窗口 权柄 29 for (String winHandle1 : driver.getWindowHandles()) { //得到浏览器所有窗口的权柄为Set集合,遍历 30 if (!winHandle1.equals(SecondtHandle)) { //如果为 最先的窗口 权柄跳出 31 driver.close(); 32 driver.switchTo().window(winHandle1); //如果不为 最先的窗口 权柄,将 新窗口的操作权柄 给 driver 33 System.out.println(driver.getCurrentUrl()); //打印是否为新窗口 34 } 35 } 36 37 38 FileInputStream fis = new FileInputStream("D:\Desktop\test.xlsx");//创建输入流,获取本地文件 39 XSSFWorkbook workbook=new XSSFWorkbook(fis); //创建工作簿,将数据读入到workbook中 40 XSSFSheet sheet1 = workbook.getSheet("Sheet1"); 41 String index= sheet1.getRow(0).getCell(0).getStringCellValue(); //读取文件内容,为下文做索引 42 43 WebElement input1 = driver.findElement(By.xpath("//*[@id="keyword1"]")); 44 input1.sendKeys(index); 45 driver.findElement(By.xpath("//*[@id="searJob"]/strong")).click(); //搜索关键词 46 47 for (int i=1;i<100;i++){ 48 String name=driver.findElement(By.xpath("//*[@id="list_con"]/li["+i+"]/div[2]/div/a")).getAttribute("title"); 49 //查找每一条记录的title值 50 sheet1.createRow(i).createCell(0).setCellValue(name); 51 } //遍历该页面所有公司名并写入excel 52 53 sheet1.createRow(0).createCell(0).setCellValue(index); 54 FileOutputStream os = new FileOutputStream("D:\Desktop\2.xlsx");//创建一个向指定位置写入文件的输出流 55 workbook.write(os);//向指定的文件写入excel 56 os.close();//关闭流 57 driver.close();//关闭浏览器 58 } 59 }
三、运行结果,读取本地的test.xlsx文件内容,将结果作为搜索条件