pom
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.syllabus</groupId> <artifactId>chapter-7</artifactId> <version>1.0-SNAPSHOT</version> <!-- 额外指定可以通过如下链接下载Jar包依赖 --> <repositories> <repository> <id>1</id> <name>MAVEN-CENTRE</name> <url>http://central.maven.org/maven2/</url> </repository> </repositories> <!-- 添加相关依赖 --> <dependencies> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>2.11.12</version> </dependency> <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java --> <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java --> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.48</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.10</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.4.4</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.4.4</version> <!-- 排除Spark依赖中关于Hadoop和Scala的依赖,以便于添加自已的版本 --> <exclusions> <exclusion> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> </exclusion> <exclusion> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> </exclusion> </exclusions> </dependency> <!-- 添加自己的Hadoop版本 --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.9.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.9.0</version> </dependency> </dependencies> <!-- 编译Scala代码的插件--> <build> <plugins> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.2</version> <executions> <execution> <id>scala-compile-first</id> <goals> <goal>compile</goal> </goals> <configuration> <includes> <include>**/*.scala</include> </includes> </configuration> </execution> <execution> <id>scala-test-compile</id> <goals> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> </plugins> </build> </project>
import org.apache.spark.sql.SparkSession /** * @Author: 唐松怀 * @Date: 2020/3/18 21:49 */ object nihao { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .master("spark://192.168.0.120:7077") .appName("sparkSQL") .getOrCreate() Class.forName("com.mysql.jdbc.Driver").newInstance() val options = Map("url" -> "jdbc:mysql://min01:3306/test", "driver" -> "com.mysql.jdbc.Driver", "user" -> "root", "password" -> "123456", "dbtable" -> "score") val df6 = spark.read.format("jdbc").options(options).load() df6.show } }
提交
./bin/spark-submit --jars ./jars/mysql-connector-java-5.1.48.jar --class nihao --master spark://192.168.0.120:7077 --executor-memory 1G --total-executor-cores 3 chapter-7-1.0-SNAPSHOT.jar > temp999.txt
存数据到mysql
import org.apache.spark.sql.SparkSession /** * @Author: 唐松怀 * @Date: 2020/3/18 21:49 */ object nihao { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .master("spark://192.168.0.120:7077") .appName("sparkSQL") .getOrCreate() val df7_1 = spark.createDataFrame(List( ("Alice", "Female", "20"), ("Tom", "Male", "25"), ("Boris", "Male", "18"))).toDF("name", "sex", "age") val properties = new java.util.Properties() properties.setProperty("user", "root") properties.setProperty("password", "123456") import org.apache.spark.sql.SaveMode df7_1.write.mode(SaveMode.Append).jdbc("jdbc:mysql://min01:3306/test", "t_user", properties) } }
mysql - df -mysql 操作
import org.apache.spark.sql.SparkSession /** * @Author: 唐松怀 * @Date: 2020/3/18 21:49 */ object nihao { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .master("spark://192.168.0.120:7077") .appName("sparkSQL") .getOrCreate() // val df7_1 = spark.createDataFrame(List( // ("Alice", "Female", "20"), // ("Tom", "Male", "25"), // ("Boris", "Male", "18"))).toDF("name", "sex", "age") import spark.implicits._ val options = Map("url" -> "jdbc:mysql://min01:3306/test", "driver" -> "com.mysql.jdbc.Driver", "user" -> "root", "password" -> "123456", "dbtable" -> "score") val df6 = spark.read.format("jdbc").options(options).load().toDF("name", "sex", "age") df6.count() val df666= df6.filter($"name" >1).filter($"age" <100) val properties = new java.util.Properties() properties.setProperty("user", "root") properties.setProperty("password", "123456") import org.apache.spark.sql.SaveMode df666.write.mode(SaveMode.Append).jdbc("jdbc:mysql://min01:3306/test", "t_user02", properties) } }