1.解压缩
tar -zxvf /usr/tools/sqoop-1.99.7-bin-hadoop200.tar.gz
mv sqoop-1.99.7-bin-hadoop200.tar.gz sqoop-1.99.7
2.环境变量
vim /etc/profile
export SQOOP_HOME=/usr/local/sqoop-1.99.7
export PATH=$PATH:$SQOOP_HOME/bin
source /etc/profile
3.sqoop配置
vim /sqoop-1.99.7/conf/sqoop.properties
org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/home/hadoop-3.2.0/etc/hadoop
org.apache.sqoop.repository.jdbc.handler=org.apache.sqoop.repository.mysql.MySqlRepositoryHandler
org.apache.sqoop.repository.jdbc.transaction.isolation=READ_COMMITTED
org.apache.sqoop.repository.jdbc.maximum.connections=10
org.apache.sqoop.repository.jdbc.url=jdbc:mysql://node1:3306/ecs?createDatabaseIfNotExist=true
org.apache.sqoop.repository.jdbc.driver=com.mysql.jdbc.Driver
org.apache.sqoop.repository.jdbc.user=root
org.apache.sqoop.repository.jdbc.password=root
org.apache.sqoop.repository.sysprop.mysql.stream.error.file=@LOGDIR@/mysqlrepo.log
org.apache.sqoop.jetty.port=12000
#配置第三方jar包位置
mkdir /usr/local/sqoop-1.99.7/lib
org.apache.sqoop.connector.external.loadpath=/usr/local/sqoop-1.99.7/lib
也可直接将jar放到系统的lib目录下
cp /usr/tools/mysql-connector-java-8.0.13.jar /lib
4.sqoop初始化
sqoop2-tool upgrade
5.sqoop验证,验证是否正确
sqoop2-tool verify
此步骤后,mysql数据库中会创建数据库名为SQOOP的数据库,用以存储sqoop的metadata
异常处理
在执行sqoop2-tool verify时报错
2019-05-24T12:15:45,449 ERROR [main] org.apache.sqoop.repository.common.CommonRepositoryHandler - Can't execute query: CREATE DATABASE IF NOT EXISTS"SQOOP"
在mysql中执行
set global sql_mode ='ANSI_QUOTES';
或者在mysql的配置文档my-default.cnf中添加
sql_mode=ANSI_QUOTES
6.启动sqoop2服务
sqoop2-server start
sqoop2-server stop
7.sqoop客户端安装
scp -r sqoop-1.99.7/ node2:$PWD
vim /etc/profile
export SQOOP_HOME=/usr/local/sqoop-1.99.7
export PATH=$PATH:$SQOOP_HOME/bin
source /etc/profile
8.运行客户端
sqoop2-shell
9.设置显示错误信息
sqoop:000> set option --name verbose --value true
连接sqoop服务端
sqoop:000> set server --host node1
sqoop:000> set option --name verbose --value true
Verbose option was changed to true
sqoop:000> set server --host node1
Server is set successfully
验证是否已连接上
sqoop:000> show version -all
client version:
Sqoop 1.99.7 source revision 435d5e61b922a32d7bce567fe5fb1a9c0d9b1bbb
Compiled by abefine on Tue Jul 19 16:08:27 PDT 2016
server version:
Sqoop 1.99.7 source revision 435d5e61b922a32d7bce567fe5fb1a9c0d9b1bbb
Compiled by abefine on Tue Jul 19 16:08:27 PDT 2016
API versions:
[v1]
创建link对象
1.检查sqoop服务已经注册的connectors
sqoop:000> show connector
+------------------------+---------+------------------------------------------------------------+----------------------+
| Name | Version | Class | Supported Directions |
+------------------------+---------+------------------------------------------------------------+----------------------+
| generic-jdbc-connector | 1.99.7 | org.apache.sqoop.connector.jdbc.GenericJdbcConnector | FROM/TO |
| kite-connector | 1.99.7 | org.apache.sqoop.connector.kite.KiteConnector | FROM/TO |
| oracle-jdbc-connector | 1.99.7 | org.apache.sqoop.connector.jdbc.oracle.OracleJdbcConnector | FROM/TO |
| ftp-connector | 1.99.7 | org.apache.sqoop.connector.ftp.FtpConnector | TO |
| hdfs-connector | 1.99.7 | org.apache.sqoop.connector.hdfs.HdfsConnector | FROM/TO |
| kafka-connector | 1.99.7 | org.apache.sqoop.connector.kafka.KafkaConnector | TO |
| sftp-connector | 1.99.7 | org.apache.sqoop.connector.sftp.SftpConnector | TO |
+------------------------+---------+------------------------------------------------------------+----------------------+
sqoop:000> create link -c generic-jdbc-connector
Creating link for connector with name generic-jdbc-connector
Please fill following values to create new link object
Name: mysql-link
Database connection
Driver class: com.mysql.jdbc.Driver
Connection String: jdbc:mysql://node1:3306/ecs
Username: root
Password: ****
Fetch Size:
Connection Properties:
There are currently 0 values in the map:
entry#
SQL Dialect
Identifier enclose: 备注:定界符(输入的空格)
New link was successfully created with validation status OK and name mysql-link
sqoop:000> show link
+------------+------------------------+---------+
| Name | Connector Name | Enabled |
+------------+------------------------+---------+
| mysql-link | generic-jdbc-connector | true |
+------------+------------------------+---------+
sqoop:000> show link -a
1 link(s) to show:
link with name mysql-link (Enabled: true, Created by root at 5/24/19 1:22 PM, Updated by root at 5/24/19 1:22 PM)
Using Connector generic-jdbc-connector with name {1}
Database connection
Driver class: com.mysql.jdbc.Driver
Connection String: jdbc:mysql://node1:3306/ecs
Username: root
Password:
Fetch Size:
Connection Properties:
SQL Dialect
Identifier enclose:
sqoop:000> create link -c hdfs-connector
Creating link for connector with name hdfs-connector
Please fill following values to create new link object
Name: hdfs-link
HDFS cluster
URI: hdfs://node1:9820
Conf directory: /home/hadoop-3.2.0/etc/hadoop
Additional configs::
There are currently 0 values in the map:
entry#
New link was successfully created with validation status OK and name hdfs-link
sqoop:000>
HDFS 导出到mysql
sqoop:000> create job -f "hdfs-link" -t "mysql-link"
Creating job for links with from name hdfs-link and to name mysql-link
Please fill following values to create new job object
Name: hdfs_mysql_job
Input configuration
Input directory: /user/data
Override null value:
Null value:
Incremental import
Incremental type:
0 : NONE
1 : NEW_FILES
Choose: 0
Last imported date:
Database target
Schema name: ecs
Table name: users
Column names:
There are currently 0 values in the list:
element#
Staging table:
Clear stage table:
Throttling resources
Extractors:
Loaders:
Classpath configuration
Extra mapper jars:
There are currently 0 values in the list:
element#
New job was successfully created with validation status OK and name hdfs_mysql_job
sqoop:000> show job
+----+----------------+----------------------------+-------------------------------------+---------+
| Id | Name | From Connector | To Connector | Enabled |
+----+----------------+----------------------------+-------------------------------------+---------+
| 1 | hdfs_mysql_job | hdfs-link (hdfs-connector) | mysql-link (generic-jdbc-connector) | true |
+----+----------------+----------------------------+-------------------------------------+---------+
修改job
sqoop:000> update job -n hdfs_mysql_job
启动job
sqoop:000> start job -n hdfs_mysql_job
Submission details
Job Name: hdfs_mysql_job
Server URL: http://node1:12000/sqoop/
Created by: root
Creation date: 2019-05-24 14:27:45 HKT
Lastly updated by: root
External ID: job_1558650816925_0003
http://node1:8088/proxy/application_1558650816925_0003/
2019-05-24 14:27:45 HKT: BOOTING - Progress is not available
查看job状态
sqoop:000> status job -n hdfs_mysql_job
Submission details
Job Name: hdfs_mysql_job
Server URL: http://node1:12000/sqoop/
Created by: root
Creation date: 2019-05-24 14:27:45 HKT
Lastly updated by: root
External ID: job_1558650816925_0003
http://node1:8088/proxy/application_1558650816925_0003/
2019-05-24 14:30:10 HKT: BOOTING - 0.00 %
启动job,并显示执行状态
sqoop:000> start job -n hdfs_mysql_job -s
停止job
sqoop:000> stop job -n hdfs_mysql_job
mysql导入到hadoop
sqoop:000> create job -f "mysql-link" -t "hdfs-link"
Name: mysql_hdfs_job
Database source
Schema name: ecs
Table name: role
SQL statement:
Column names:
There are currently 0 values in the list:
element#
Partition column:
Partition column nullable:
Boundary query:
Incremental read
Check column:
Last value:
Target configuration
Override null value:
Null value:
File format:
0 : TEXT_FILE
1 : SEQUENCE_FILE
2 : PARQUET_FILE
Choose: 0
Compression codec:
0 : NONE
1 : DEFAULT
2 : DEFLATE
3 : GZIP
4 : BZIP2
5 : LZO
6 : LZ4
7 : SNAPPY
8 : CUSTOM
Choose: 0
Custom codec:
Error message: Can't be null nor empty
Output directory: /user/data
Append mode:
Throttling resources
Extractors:
Loaders:
Classpath configuration
Extra mapper jars:
There are currently 0 values in the list:
element#
New job was successfully created with validation status OK and name mysql_hdfs_job
出现错误时,开启显示错误详细信息
set option --name verbose --value true
启动job时报错
Exception: org.apache.sqoop.common.SqoopException Message: GENERIC_JDBC_CONNECTOR_0003:Unable to access meta data -
查看错误信息,使用ecs账户时错误信息
Caused by: Exception: java.sql.SQLSyntaxErrorException Message: Unknown table 'role' in information_schema
使用root账户时,错误信息是:
Caused by: Exception: java.sql.SQLSyntaxErrorException Message: Table 'SQOOP.role' doesn't exist
看起来是识别不到设置的数据ecs,默认取的第一个数据库
使用sqoop2从mysql导入到hdfs的测试失败
sqoop2-shell报错
[root@node1 ~]# sqoop2-shell
Setting conf dir: /usr/local/sqoop-1.99.7/bin/../conf
Sqoop home directory: /usr/local/sqoop-1.99.7
Exception in thread "main" java.lang.ExceptionInInitializerError
at org.codehaus.groovy.runtime.InvokerHelper.<clinit>(InvokerHelper.java:66)
at org.codehaus.groovy.runtime.callsite.CallSiteArray.createCallConstructorSite(CallSiteArray.java:87)
at org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCallConstructor(CallSiteArray.java:60)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callConstructor(AbstractCallSite.java:235)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callConstructor(AbstractCallSite.java:247)
at org.codehaus.groovy.tools.shell.Groovysh.<clinit>(Groovysh.groovy:54)
at org.apache.sqoop.shell.SqoopShell.main(SqoopShell.java:75)
Caused by: groovy.lang.GroovyRuntimeException: Conflicting module versions. Module [groovy-all is loaded in version 2.4.11 and you are trying to load version 2.4.0
解决方法:找出groovy.jar删除
[root@node1 sqoop-1.99.7]# find . -name 'groo*'
[root@node1 sqoop-1.99.7]# rm -r ./shell/lib/groovy-all-2.4.0.jar
rm: remove regular file ‘./shell/lib/groovy-all-2.4.0.jar’? y