大数据认证考试机试D卷
项目需求:
- 1.通过 mapreduce 处理源文件文本内容
- 2.存放数据仓库hive规则查询后存入数据库mysql
- 3.最后通过ssm进行页面展示
源文件内容: (emp.txt)
- 7369,SMITH,CLERK,7902,17-12月-80,800,,20
7499,ALLEN,SALESMAN,7698,20-2月-81,1600,300,30
7521,WARD,SALESMAN,7698,22-2月-81,1250,500,30
7566,JONES,MANAGER,7839,02-4月-81,2975,,20
7654,MARTIN,SALESMAN,7698,28-9月-81,1250,1400,30
7698,BLAKE,MANAGER,7839,01-5月-81,2850,,30
7782,CLARK,MANAGER,7839,09-6月-81,2450,,10
7839,KING,PRESIDENT,,17-11月-81,5000,,10
7844,TURNER,SALESMAN,7698,08-9月-81,1500,0,30
7900,JAMES,CLERK,7698,03-12月-81,950,,30
7902,FORD,ANALYST,7566,03-12月-81,3000,,20
7934,MILLER,CLERK,7782,23-1月-82,1300,,10 - 清洗需求:
- 使用Mapreduce进行数据清洗,只保留日志中的编号,姓名,部门,总工资信息
mapreduce 源码:
- 分三个java类进行清洗
- 分别是 map 阶段的 EmpMapper
- partition(分区)阶段的 EmpPartition
- Main(提交) 的 EmpMain
package com.hp;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/*
* (1)需求: 编号,姓名,部门,总工资信息 创建Hive数据库db_KS
* 编号 名称 职务 组 入职时间 基本工资 奖金 工龄
7369,SMITH,CLERK,7902,17-12月-80,800,,20
7499,ALLEN,SALESMAN,7698,20-2月-81,1600,300,30
0 1 2 3 4 5 6 7
编号 姓名 职务 组 入职时间 基资 奖金 工龄
*/
public class EmpMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
throws IOException, InterruptedException {
String line[] = value.toString().split(",");
if(line[6].toString().equals("")) {
String date = line[0]+" "+line[1]+" "+line[2]+" "+line[5];
context.write(new Text(date), NullWritable.get());
}else {
String date = line[0]+" "+line[1]+" "+line[2]+" "+(Integer.parseInt(line[5])+Integer.parseInt(line[6]));
context.write(new Text(date), NullWritable.get());
}
}
}
package com.hp;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class EmpPartition extends Partitioner<Text, NullWritable> {
public int getPartition(Text arg0, NullWritable arg1, int arg2) {
if (arg0.toString().split(" ")[2].equals("CLERK")) {
return 0;
} else if (arg0.toString().split(" ")[2].equals("SALESMAN")) {
return 1;
} else if (arg0.toString().split(" ")[2].equals("MANAGER")) {
return 2;
} else if (arg0.toString().split(" ")[2].equals("PRESIDENT")) {
return 3;
} else {
return 4;
}
}
}
package com.hp;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class EmpMain {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem fs =FileSystem.get(conf);
if(fs.exists(new Path(args[1]))) {
fs.delete(new Path(args[1]),true);
}
Job job = Job.getInstance();
job.setJarByClass(EmpMain.class);
job.setMapperClass(EmpMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setPartitionerClass(EmpPartition.class);
job.setNumReduceTasks(5);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
最后: 将包导出为 jar 包,生成到桌面 通过工具传入到linux中,开启hdfs平台,上传源文件,进行解析
解析代码命令:
[root@master MP]# hadoop jar 0611D.jar /mpfile/emp.txt /0611Dresult
接下来我们进行将清洗后的数据存放到数据仓库的阶段 --(hive) 部分
首先我们要先创建 hive 数据库
create database kaoshi;
接下来 创建 数据表 (需要对字段进行分区)
create table emp_backup(
ID string,
name string,
section string,
Grosssalary string
)partitioned by(sections string)row format delimited fields terminated by ' ';
然后 我们 加载 数据到 数据表 也就是(mapreduce解析出来的数据存放到我们刚刚创建的hive表) 注意: 需要进行分区
load data inpath '/0611Dresult/part-r-00000' into table emp_backup partition(sections="CLERK");
load data inpath '/0611Dresult/part-r-00001' into table emp_backup partition(sections="SALESMAN");
load data inpath '/0611Dresult/part-r-00002' into table emp_backup partition(sections="MANAGER");
load data inpath '/0611Dresult/part-r-00003' into table emp_backup partition(sections="PRESIDENT");
load data inpath '/0611Dresult/part-r-00004' into table emp_backup partition(sections="ANALYST");
最后 我们 对 emp_backup 表进行全查,以确保我们数据成功加载
hive> select * from emp_backup;
7902 FORD ANALYST 3000 ANALYST
7900 JAMES CLERK 950 CLERK
7934 MILLER CLERK 1300 CLERK
7566 JONES MANAGER 2975 MANAGER
7698 BLAKE MANAGER 2850 MANAGER
7782 CLARK MANAGER 2450 MANAGER
7839 KING PRESIDENT 5000 PRESIDENT
7499 ALLEN SALESMAN 1900 SALESMAN
7521 WARD SALESMAN 1750 SALESMAN
7654 MARTIN SALESMAN 2650 SALESMAN
7844 TURNER SALESMAN 1500 SALESMAN
然后根据题意看我们是否要进行hive的查询
-
统计出每个部门的总人数,部门总支出,平均工资,部门名称,将此信息存放在emp_new数据表中
create table emp_new row format delimited fields terminated by ' ' as select section,count(id),sum(Grosssalary),avg(Grosssalary) from emp_backup group by section; 查看 新表数据 hive> select * from emp_new; OK ANALYST 1 3000.0 3000.0 CLERK 2 2250.0 1125.0 MANAGER 3 8275.0 2758.3333333333335 PRESIDENT 1 5000.0 5000.0 SALESMAN 4 7800.0 1950.0
-
将 emp_new 数据 导入到 mysql
创建 mysql 表 mysql> create database kaoshi; mysql> use kaoshi; mysql> create table emp_new(section varchar(32),count varchar(32),sumjq varchar(32),avgjq varchar(32));
-
然后通过 sqoop 导入数据
bin/sqoop export --connect jdbc:mysql://localhost:3306/kaoshi --username root --password mysql --table emp_new --export-dir /user/hive/warehouse/kaoshi.db/emp_new --input-fields-terminated-by " ";
-
进行 mysql 全查 验证数据时候成功导入
mysql> select * from emp_new; +-----------+-------+--------+--------------------+ | section | count | sumjq | avgjq | +-----------+-------+--------+--------------------+ | SALESMAN | 4 | 7800.0 | 1950.0 | | ANALYST | 1 | 3000.0 | 3000.0 | | CLERK | 2 | 2250.0 | 1125.0 | | MANAGER | 3 | 8275.0 | 2758.3333333333335 | | PRESIDENT | 1 | 5000.0 | 5000.0 | +-----------+-------+--------+--------------------+ 5 rows in set (0.00 sec)
终于开始进行 SSM 了
创建项目 导入工程jar 自行操作
然后开始写配置文件
第一个 jdbc.properties 连接数据库配置文件
jdbc.driver=com.mysql.jdbc.Driver
jdbc.url=jdbc:mysql://192.168.65.110:3306/kaoshi
jdbc.username=root
jdbc.password=mysql
applicationContext.xml
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context" xmlns:tx="http://www.springframework.org/schema/tx"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd">
<!-- 告知spring在哪一个包下面使用了注解 -->
<context:component-scan base-package="com.hp"></context:component-scan>
<!-- 读取小配置文件 jdbc.properties -->
<context:property-placeholder location="classpath:jdbc.properties"></context:property-placeholder>
<!-- 配置连接数据库的相关参数 -->
<bean id="dataSource" class="com.mchange.v2.c3p0.ComboPooledDataSource">
<property name="driverClass" value="${jdbc.driver}"></property>
<property name="jdbcUrl" value="${jdbc.url}"></property>
<property name="user" value="${jdbc.username}"></property>
<property name="password" value="${jdbc.password}"></property>
</bean>
<!-- 构建SqlSessionFactory -->
<bean id="sqlSessionFactory" class="org.mybatis.spring.SqlSessionFactoryBean">
<!-- sqlsession离不开数据源 注入数据源 -->
<property name="dataSource" ref="dataSource"></property>
<!-- 给实体类起别名 -->
<property name="typeAliasesPackage">
<value>com.hp.entity</value>
</property>
<!-- 注册mapper -->
<property name="mapperLocations">
<list>
<value>classpath:com/hp/dao/*Mapper.xml</value>
</list>
</property>
</bean>
<!-- 创建dao实现类对象 -->
<bean id="mapperScannerConfigurer" class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<!-- 告知spring给哪一个包下的接口创建实现类对象 -->
<property name="basePackage">
<value>com.hp.dao</value>
</property>
</bean>
<!-- 引入控制事务 -->
<bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<!-- 控制事务需要数据源 -->
<property name="dataSource" ref="dataSource"></property>
</bean>
<!-- 激活事务的注解 -->
<tx:annotation-driven transaction-manager="transactionManager"></tx:annotation-driven>
</beans>
mvc-servlet.xml
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context" xmlns:tx="http://www.springframework.org/schema/tx"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd">
<!-- 告知spring在哪一个包下面使用了注解 -->
<context:component-scan base-package="com.hp"></context:component-scan>
<!-- 读取小配置文件 jdbc.properties -->
<context:property-placeholder location="classpath:jdbc.properties"></context:property-placeholder>
<!-- 配置连接数据库的相关参数 -->
<bean id="dataSource" class="com.mchange.v2.c3p0.ComboPooledDataSource">
<property name="driverClass" value="${jdbc.driver}"></property>
<property name="jdbcUrl" value="${jdbc.url}"></property>
<property name="user" value="${jdbc.username}"></property>
<property name="password" value="${jdbc.password}"></property>
</bean>
<!-- 构建SqlSessionFactory -->
<bean id="sqlSessionFactory" class="org.mybatis.spring.SqlSessionFactoryBean">
<!-- sqlsession离不开数据源 注入数据源 -->
<property name="dataSource" ref="dataSource"></property>
<!-- 给实体类起别名 -->
<property name="typeAliasesPackage">
<value>com.hp.entity</value>
</property>
<!-- 注册mapper -->
<property name="mapperLocations">
<list>
<value>classpath:com/hp/dao/*Mapper.xml</value>
</list>
</property>
</bean>
<!-- 创建dao实现类对象 -->
<bean id="mapperScannerConfigurer" class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<!-- 告知spring给哪一个包下的接口创建实现类对象 -->
<property name="basePackage">
<value>com.hp.dao</value>
</property>
</bean>
<!-- 引入控制事务 -->
<bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<!-- 控制事务需要数据源 -->
<property name="dataSource" ref="dataSource"></property>
</bean>
<!-- 激活事务的注解 -->
<tx:annotation-driven transaction-manager="transactionManager"></tx:annotation-driven>
</beans>
web.xml
<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xmlns.jcp.org/xml/ns/javaee" xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/javaee http://xmlns.jcp.org/xml/ns/javaee/web-app_3_1.xsd" id="WebApp_ID" version="3.1">
<display-name>kaoshiD</display-name>
<!-- 编码格式 -->
<filter>
<filter-name>encode</filter-name>
<filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class>
<init-param>
<param-name>encoding</param-name>
<param-value>UTF-8</param-value>
</init-param>
</filter>
<filter-mapping>
<filter-name>encode</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
<!-- 启动web容器时,自动装配ApplicationContext.xml的配置信息 -->
<listener>
<listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
</listener>
<!-- 编写springmvc的核心入口 Servlet -->
<servlet>
<servlet-name>mvc</servlet-name>
<servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
<!-- 初始化 -->
<load-on-startup>0</load-on-startup>
</servlet>
<!-- 客户端所有请求都经过入口Servlet处理 -->
<servlet-mapping>
<servlet-name>mvc</servlet-name>
<url-pattern>/</url-pattern>
</servlet-mapping>
</web-app>
配置文件四个写完之后 写我们的java代码
首先需要四个包 分别是
- com.hp.entity
- com.hp.dao
- com.hp.service
- com.hp.controller
先写 entity 实体类
User类
package com.hp.entity;
import org.springframework.stereotype.Repository;
@Repository
public class User {
private int id;
private String username;
private String password;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
public User(int id, String username, String password) {
super();
this.id = id;
this.username = username;
this.password = password;
}
public User() {
super();
// TODO Auto-generated constructor stub
}
@Override
public String toString() {
return "User [id=" + id + ", username=" + username + ", password="
+ password + "]";
}
}
Emp_new类
package com.hp.entity;
import org.springframework.stereotype.Repository;
@Repository
public class Emp_new {
private String section;
private String count;
private String sumjq;
private String avgjq;
public String getSection() {
return section;
}
public void setSection(String section) {
this.section = section;
}
public String getCount() {
return count;
}
public void setCount(String count) {
this.count = count;
}
public String getSumjq() {
return sumjq;
}
public void setSumjq(String sumjq) {
this.sumjq = sumjq;
}
public String getAvgjq() {
return avgjq;
}
public void setAvgjq(String avgjq) {
this.avgjq = avgjq;
}
@Override
public String toString() {
return "Emp_new [section=" + section + ", count=" + count + ", sumjq="
+ sumjq + ", avgjq=" + avgjq + "]";
}
public Emp_new(String section, String count, String sumjq, String avgjq) {
super();
this.section = section;
this.count = count;
this.sumjq = sumjq;
this.avgjq = avgjq;
}
public Emp_new() {
}
}
再写 dao层
UserDao.java
package com.hp.dao;
import com.hp.entity.User;
public interface UserDao {
//登陆
User login(String username);
}
UserDaoMapper.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.hp.dao.UserDao">
<select id="login" resultType="User">
select id,username,password from user where username=#{username}
</select>
</mapper>
Emp_newDao.java
package com.hp.dao;
import java.util.ArrayList;
import com.hp.entity.Emp_new;
public interface Emp_newDao {
//全查
ArrayList<Emp_new> findAll();
}
Emp_newMapper.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.hp.dao.Emp_newDao">
<select id="findAll" resultType="Emp_new">
select * from emp_new;
</select>
</mapper>
再写 service 服务层
UserService.java
package com.hp.service;
import com.hp.entity.User;
public interface UserService {
User login(String username);
}
UserServiceImpl.java
package com.hp.service;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import com.hp.dao.UserDao;
import com.hp.entity.User;
@Service
@Transactional //控制事务
public class UserServiceImpl implements UserService {
@Autowired
private UserDao ud;
public User login(String username) {
return ud.login(username);
}
}
Emp_newService.java
package com.hp.service;
import java.util.ArrayList;
import com.hp.entity.Emp_new;
public interface Emp_newService {
ArrayList<Emp_new> findAll();
}
Emp_newServiceImpl.java
package com.hp.service;
import java.util.ArrayList;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import com.hp.dao.Emp_newDao;
import com.hp.entity.Emp_new;
@Service
@Transactional //控制事务
public class Emp_newServiceImpl implements Emp_newService {
@Autowired
private Emp_newDao ed;
public ArrayList<Emp_new> findAll() {
return ed.findAll();
}
}
再写 controller 控制层
UserController.java
package com.hp.controller;
import javax.servlet.http.HttpSession;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import com.hp.entity.User;
import com.hp.service.UserService;
@Controller
@RequestMapping(value="/login")
public class UserController {
@Autowired
private UserService userservice;
//登陆
@RequestMapping("/user")
public String login(String username,String password,HttpSession session){
System.out.println("jsp:"+" "+username+" "+password);
User user = userservice.login(username);
System.out.println(user);
if(user != null){
if(user.getPassword().equals(password)){
session.setAttribute("admin", user);
System.out.println("登陆成功");
return "forward:/emp/findall";
}else{
System.out.println("密码错误");
return "login";
}
}else{
System.out.println("用户不存在");
return "login";
}
}
}
Emp_newController.java
package com.hp.controller;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import com.hp.entity.Emp_new;
import com.hp.service.Emp_newService;
@Controller
@RequestMapping(value="/emp")
public class Emp_newController {
@Autowired
private Emp_newService es;
@RequestMapping("/findall")
public String findAll(Model model){
List<Emp_new> findall = es.findAll();
System.out.println(findall);
model.addAttribute("findall",findall);
return "forward:/findall.jsp";
}
}
最后 书写 前台的页面
新建 login.jsp 在 WEB-INF 目录下
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>登陆界面</title>
</head>
<body>
<form action="/kaoshiD/login/user" method="post">
<h2>用户登录</h2>
用户名:<input type="text" name="username"><br/>
密码:<input type="text" name="password"><br/>
<input type="submit" value="提交">
<a href="/kaoshiB/register.jsp"><input type="button" value="注册"></a>
</form>
</body>
</html>
新建 findall.jsp 同样在 WEB-INF 目录下
<%@page contentType="text/html;charSet=UTF-8" isELIgnored="false"
language="java" pageEncoding="UTF-8"%>
<%@taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%>
<c:set value="${pageContext.request.contextPath}" var="path" />
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>商品全查界面</title>
</head>
<body>
<center>
<h3>展示商品全查</h3>
<table border="1">
<tr>
<td>部门名称</td>
<td>员工总人数</td>
<td>部门总工资</td>
<td>部门平均工资</td>
</tr>
<c:forEach items="${requestScope.findall}" var="emp">
<tr>
<td>${emp.section}</td>
<td>${emp.count}</td>
<td>${emp.sumjq}</td>
<td>${emp.avgjq}</td>
</tr>
</c:forEach>
</table>
</center>
</body>
</html>
最后 进行演示 浏览器访问地址: http://localhost:8080/kaoshiD/login.jsp
自行给 user 表添加用户数据 用于登陆
用户登录
用户名:
密码:
登陆成功后的查询界面
展示员工数据全查
部门名称 | 员工总人数 | 部门总工资 | 部门平均工资 |
---|---|---|---|
SALESMAN | 4 | 7800.0 | 1950.0 |
ANALYST | 1 | 3000.0 | 3000.0 |
CLERK | 2 | 2250.0 | 1125.0 |
MANAGER | 3 | 8275.0 | 2758.3333333333335 |
PRESIDENT | 1 | 5000.0 | 5000.0 |