Spring Batch 批处理框架 埃森哲和Spring Source研发
主要解决批处理数据的问题,包含并行处理,事务处理机制等。具有健壮性 可扩展,和自带的监控功能,并且支持断点和重发。让程序员更加注重于业务实现。
Spring Batch 结构如下
JobRepository :作业仓库 负责job。step执行过程的状态保存
JobLauncher : 作业调度器 提供执行job入口
Job : 作业 由一个或者多个step组成 封装多个批处理操作,每个step可以有自己的上下文存放变量和自己的生命周期。
Step: 作业步 job的一个环节,有多个或者一个step组成job
Tasklet: Step中的具体执行逻辑的动作 可循环执行,支持异步和同步 适用于不同场景
Chunk :给定的item集合 可以定义对chunk的读操作,处理操作,写操作,提交间隔等 这是SpringBatch 一个特性
Item:一条数据记录
ItemReader: 从数据源(文件系统 队列 文件等)读取item
ItemProcessor:在Item写入前 进行一些处理 比如数据清洗,数据转换,数据校验,数据过滤等。
ItemWrieter :将item批量输出到数据源(文件系统,队列,数据库等)
看下batch的基本配置元素 分为两种 一种为在内存中。
<?xml version="1.0" encoding="UTF-8"?> <beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p" xmlns:tx="http://www.springframework.org/schema/tx" xmlns:aop="http://www.springframework.org/schema/aop" xmlns:context="http://www.springframework.org/schema/context" xsi:schemaLocation="http://www.springframework.org/schema/beans default-autowire="byName"> <!--job工厂 不是定时--> <bean id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean"> </bean>
<!--job加载器-->
<bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher"> <property name="jobRepository" ref="jobRepository"/> </bean>
<!--事务管理器-->
<bean id="transactionManager" class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/> </beans>
第二种为放在数据库中 需要在数据库中建表 脚本在spring-batch-core的core包里面。mysql版本如下 下一篇文章解释表的作用
-- Autogenerated: do not edit this file CREATE TABLE BATCH_JOB_INSTANCE ( JOB_INSTANCE_ID BIGINT NOT NULL PRIMARY KEY , VERSION BIGINT , JOB_NAME VARCHAR(100) NOT NULL, JOB_KEY VARCHAR(32) NOT NULL, constraint JOB_INST_UN unique (JOB_NAME, JOB_KEY) ) ENGINE=InnoDB; CREATE TABLE BATCH_JOB_EXECUTION ( JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , VERSION BIGINT , JOB_INSTANCE_ID BIGINT NOT NULL, CREATE_TIME DATETIME NOT NULL, START_TIME DATETIME DEFAULT NULL , END_TIME DATETIME DEFAULT NULL , STATUS VARCHAR(10) , EXIT_CODE VARCHAR(100) , EXIT_MESSAGE VARCHAR(2500) , LAST_UPDATED DATETIME, constraint JOB_INST_EXEC_FK foreign key (JOB_INSTANCE_ID) references BATCH_JOB_INSTANCE(JOB_INSTANCE_ID) ) ENGINE=InnoDB; CREATE TABLE BATCH_JOB_EXECUTION_PARAMS ( JOB_EXECUTION_ID BIGINT NOT NULL , TYPE_CD VARCHAR(6) NOT NULL , KEY_NAME VARCHAR(100) NOT NULL , STRING_VAL VARCHAR(250) , DATE_VAL DATETIME DEFAULT NULL , LONG_VAL BIGINT , DOUBLE_VAL DOUBLE PRECISION , IDENTIFYING CHAR(1) NOT NULL , constraint JOB_EXEC_PARAMS_FK foreign key (JOB_EXECUTION_ID) references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) ) ENGINE=InnoDB; CREATE TABLE BATCH_STEP_EXECUTION ( STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , VERSION BIGINT NOT NULL, STEP_NAME VARCHAR(100) NOT NULL, JOB_EXECUTION_ID BIGINT NOT NULL, START_TIME DATETIME NOT NULL , END_TIME DATETIME DEFAULT NULL , STATUS VARCHAR(10) , COMMIT_COUNT BIGINT , READ_COUNT BIGINT , FILTER_COUNT BIGINT , WRITE_COUNT BIGINT , READ_SKIP_COUNT BIGINT , WRITE_SKIP_COUNT BIGINT , PROCESS_SKIP_COUNT BIGINT , ROLLBACK_COUNT BIGINT , EXIT_CODE VARCHAR(100) , EXIT_MESSAGE VARCHAR(2500) , LAST_UPDATED DATETIME, constraint JOB_EXEC_STEP_FK foreign key (JOB_EXECUTION_ID) references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) ) ENGINE=InnoDB; CREATE TABLE BATCH_STEP_EXECUTION_CONTEXT ( STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, SHORT_CONTEXT VARCHAR(2500) NOT NULL, SERIALIZED_CONTEXT TEXT , constraint STEP_EXEC_CTX_FK foreign key (STEP_EXECUTION_ID) references BATCH_STEP_EXECUTION(STEP_EXECUTION_ID) ) ENGINE=InnoDB; CREATE TABLE BATCH_JOB_EXECUTION_CONTEXT ( JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, SHORT_CONTEXT VARCHAR(2500) NOT NULL, SERIALIZED_CONTEXT TEXT , constraint JOB_EXEC_CTX_FK foreign key (JOB_EXECUTION_ID) references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) ) ENGINE=InnoDB; CREATE TABLE BATCH_STEP_EXECUTION_SEQ (ID BIGINT NOT NULL) ENGINE=MYISAM; INSERT INTO BATCH_STEP_EXECUTION_SEQ values(0); CREATE TABLE BATCH_JOB_EXECUTION_SEQ (ID BIGINT NOT NULL) ENGINE=MYISAM; INSERT INTO BATCH_JOB_EXECUTION_SEQ values(0); CREATE TABLE BATCH_JOB_SEQ (ID BIGINT NOT NULL) ENGINE=MYISAM; INSERT INTO BATCH_JOB_SEQ values(0);
<?xml version="1.0" encoding="UTF-8"?> <bean:beans xmlns="http://www.springframework.org/schema/batch" //这里batch的xsd xmlns:bean="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p" xmlns:tx="http://www.springframework.org/schema/tx" xmlns:aop="http://www.springframework.org/schema/aop" xmlns:context="http://www.springframework.org/schema/context" xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx-3.0.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop-3.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch-2.2.xsd"> <!-- 作业仓库 --> <job-repository id="jobRepository" data-source="dataSource" transaction-manager="transactionManager" isolation-level-for-create="SERIALIZABLE" table-prefix="BATCH_" max-varchar-length="1000" /> <!-- 作业调度器 --> <bean:bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher"> <bean:property name="jobRepository" ref="jobRepository"/> </bean:bean> <!-- 事务管理器 --> <bean:bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager"> <bean:property name="dataSource" ref="dataSource" /> </bean:bean> <!-- 数据源 可以引入properties 做成可配--> <bean:bean id="dataSource" class="org.springframework.jdbc.datasource.DriverManagerDataSource"> <bean:property name="driverClassName"> <bean:value>com.mysql.jdbc.Driver</bean:value> </bean:property> <bean:property name="url"> <bean:value>jdbc:mysql://127.0.0.1:3306/batch</bean:value> </bean:property> <bean:property name="username" value="root"></bean:property> <bean:property name="password" value="root"></bean:property> </bean:bean> </bean:beans>
这里拿spring batch一书上面例子说明应用场景:
如图 这种情况 下面简单的实现:
1.定义信用卡实体
/** * */ package com.juxtapose.example.ch02; /** * 信用卡对账单模型.<br> * @author bruce.liu(mailto:jxta.liu@gmail.com) * 2013-1-6下午09:56:02 */ public class CreditBill { private String accountID = ""; /** 银行卡账户ID */ private String name = ""; /** 持卡人姓名 */ private double amount = 0; /** 消费金额 */ private String date; /** 消费日期 ,格式YYYY-MM-DD HH:MM:SS*/ private String address; /** 消费场所 **/ public String getAccountID() { return accountID; } public void setAccountID(String accountID) { this.accountID = accountID; } public String getName() { return name; } public void setName(String name) { this.name = name; } public double getAmount() { return amount; } public void setAmount(double amount) { this.amount = amount; } public String getDate() { return date; } public void setDate(String date) { this.date = date; } public String getAddress() { return address; } public void setAddress(String address) { this.address = address; } /** * */ public String toString(){ StringBuffer sb = new StringBuffer(); sb.append("accountID=" + getAccountID() + ";name=" + getName() + ";amount=" + getAmount() + ";date=" + getDate() + ";address=" + getAddress()); return sb.toString(); } }
2. 定义processor 这里只是打印
import org.springframework.batch.item.ItemProcessor; public class CreditBillProcessor implements ItemProcessor<CreditBill, CreditBill> { public CreditBill process(CreditBill bill) throws Exception { System.out.println(bill.toString()); return bill; } }
第三 :
<?xml version="1.0" encoding="UTF-8"?> <bean:beans xmlns="http://www.springframework.org/schema/batch" xmlns:bean="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p" xmlns:tx="http://www.springframework.org/schema/tx" xmlns:aop="http://www.springframework.org/schema/aop" xmlns:context="http://www.springframework.org/schema/context" xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx-3.0.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop-3.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch-2.2.xsd"> <bean:import resource="classpath:ch02/job-context.xml"/> <job id="billJob"> <step id="billStep"> <tasklet transaction-manager="transactionManager"> <chunk reader="csvItemReader" writer="csvItemWriter" processor="creditBillProcessor" commit-interval="2"> //每两行插入一次 </chunk> </tasklet> </step> </job> <job id="testJob"> <step id="testStep"> <tasklet transaction-manager="transactionManager"> <chunk reader="csvItemReader" writer="csvItemWriter" processor="creditBillProcessor" commit-interval="2"> </chunk> </tasklet> </step> </job> <!-- 读取信用卡账单文件,CSV格式 --> <bean:bean id="csvItemReader" class="org.springframework.batch.item.file.FlatFileItemReader" scope="step"> <bean:property name="resource" value="classpath:ch02/data/credit-card-bill-201303.csv"/> //读取的文件名称 <bean:property name="lineMapper"> <bean:bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper"> <bean:property name="lineTokenizer" ref="lineTokenizer"/> <bean:property name="fieldSetMapper">//映射列 <bean:bean class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper"> <bean:property name="prototypeBeanName" value="creditBill"> </bean:property> </bean:bean> </bean:property> </bean:bean> </bean:property> </bean:bean> <!-- lineTokenizer --> <bean:bean id="lineTokenizer" class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer"> <bean:property name="delimiter" value=","/> //分隔符 <bean:property name="names"> <bean:list> //列名 <bean:value>accountID</bean:value> <bean:value>name</bean:value> <bean:value>amount</bean:value> <bean:value>date</bean:value> <bean:value>address</bean:value> </bean:list> </bean:property> </bean:bean> <!-- 写信用卡账单文件,CSV格式 --> <bean:bean id="csvItemWriter" class="org.springframework.batch.item.file.FlatFileItemWriter" scope="step"> <bean:property name="resource" value="file:target/ch02/outputFile.csv"/> <bean:property name="lineAggregator"> <bean:bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator"> <bean:property name="delimiter" value=","></bean:property> <bean:property name="fieldExtractor"> <bean:bean class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor"> <bean:property name="names" value="accountID,name,amount,date,address"> </bean:property> </bean:bean> </bean:property> </bean:bean> </bean:property> </bean:bean> <bean:bean id="creditBill" scope="prototype" class="com.juxtapose.example.ch02.CreditBill"> </bean:bean> <bean:bean id="creditBillProcessor" scope="step" class="com.juxtapose.example.ch02.CreditBillProcessor"> </bean:bean> </bean:beans>
4.测试
/** * */ package test.com.juxtapose.example.ch02; import java.util.LinkedHashMap; import java.util.Map; import org.springframework.batch.core.Job; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameter; import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.launch.JobLauncher; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; /** * * @author bruce.liu(mailto:jxta.liu@gmail.com) * 2013-2-28下午08:34:48 */ public class JobLaunch { /** * @param args */ public static void main(String[] args) { ApplicationContext context = new ClassPathXmlApplicationContext( "ch02/job/job.xml"); JobLauncher launcher = (JobLauncher) context.getBean("jobLauncher"); Job job = (Job) context.getBean("billJob"); try {
//这里只是测试参数 Map<String,JobParameter> map = new LinkedHashMap<String,JobParameter>(); JobParameter jb = new JobParameter("aa",true); map.put("aa", jb); JobParameters jbs = new JobParameters(map); JobExecution result = launcher.run(job, jbs); System.out.println(result.toString()); } catch (Exception e) { e.printStackTrace(); } } }
结果如上。 大功告成。
下篇将表结构和各个组件的实现和扩展。
20170818 5.00