1.sbt内容:
name := "data_ana"
version := "1.0"
scalaVersion := "2.11.8"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "2.2.0",
"org.apache.spark" %% "spark-sql" % "2.2.0",
"org.apache.hadoop" % "hadoop-client" % "2.7.3",
"org.mongodb" % "mongo-java-driver" % "3.4.2",
"org.mongodb.mongo-hadoop" % "mongo-hadoop-core" % "2.0.2",
"org.mongodb.spark" %% "mongo-spark-connector" % "2.2.0"
)
2.简单的测试代码
import com.mongodb.spark._
import org.apache.spark.sql.SparkSession
object mongdb_test {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.master("local")
.appName("lining")
.config("spark.mongodb.input.uri", "mongodb://name:passwd@192.168.1.11:27017/database_name.table_name")
.config("spark.mongodb.output.uri", "mongodb://name:passwd@192.168.1.11:27017/database_name.table_name")
.getOrCreate()
val lines=MongoSpark.load(spark)
lines.printSchema()
lines.show()
}
}
3.相关的链接
https://docs.mongodb.com/spark-connector/current/scala/datasets-and-sql/
http://spark.apache.org/docs/2.1.0/sql-programming-guide.html
http://www.thebigdata.cn/MongoDB/31116.html