上篇文章介绍了推荐引擎算法在spark-shell中的操作,实际环境中我们不会仅仅运行一次,
更多的是一次编译多次运行,今天我们开始实验二,不过上次实验的笔录很有用哦。
--------------------------------------------------------------------------------------------------------------------------------------------------------------
一,处理数据。
def PrepareData(): (RDD[Rating], Map[Int, String]) = {
val sc = new SparkContext(new SparkConf().setAppName("Recommend").setMaster("local"))
print("开始读取用户评分数据中...")
val rawUserData = sc.textFile("file:/E:/ml-100k/u.data")
val rawRatings = rawUserData.map(_.split(" ").take(3))
val ratingsRDD = rawRatings.map{ case Array(user, movie, rating) => Rating(user.toInt, movie.toInt, rating.toDouble) }
println("共计: " + ratingsRDD.count.toString() + "条 ratings")
print("开始读取电影数据中...")
val itemRDD = sc.textFile("/E:/ml-100k/u.item")
val movieTitle = itemRDD.map(line => line.split("\|").take(2)).map(array => (array(0).toInt,array(1))).collect().toMap
val numRatings = ratingsRDD.count()
val numUsers = ratingsRDD.map(_.user).distinct().count()
val numMovies = ratingsRDD.map(_.product).distinct().count()
println("共计: ratings:" + numRatings + " User " + numUsers + " Movie " + numMovies)
return (ratingsRDD,movieTitle)
}
二,针对用户id推荐电影。
def RecommendMovies(model:MatrixFactorizationModel,movieTitle:Map[Int,String],inputUserID:Int) = {
val RecommendMovie = model.recommendProducts(inputUserID, 10)
var i = 1
println("针对用户id" + inputUserID + "推荐下列电影:")
RecommendMovie.foreach{r =>
println(i.toString() + "." + movieTitle(r.product) + "评分: " + r.rating.toString())
i += 1
}
}
三,针对电影推荐用户。
def RecommendUsers(model:MatrixFactorizationModel,movieTitle:Map[Int,String],inputMovieID:Int) = {
val RecommendUser = model.recommendUsers(inputMovieID, 10)
var i = 1
println("针对电影 id" + inputMovieID + "电影名: " + movieTitle(inputMovieID.toInt) + "推荐下列用户id:" )
RecommendUser.foreach{r =>
println(i.toString + "用户id:" + r.user + "评分:" + r.rating)
i = i + 1
}
}
四,去除不必要的log。
def SetLogger {
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("com").setLevel(Level.OFF)
System.setProperty("spark.ui.showConsoleProgress","false")
Logger.getRootLogger().setLevel(Level.OFF);
}
五,对使用者相对友好的指令。
def recommend(model:MatrixFactorizationModel,movieTitle:Map[Int,String]) = {
var choose = ""
while (choose != "3") {
print("请选择要推荐的类型 1.针对用户推荐电影 2.针对电影推荐感兴趣的用户 3.离开?")
choose = readLine()
if (choose == "1") {
print("请输入用户id?")
val inputUserID = readLine()
RecommendMovies(model,movieTitle,inputUserID.toInt)
} else if (choose == "2") {
print("请输入电影的id?")
val inputMovieID = readLine()
RecommendUsers(model,movieTitle,inputMovieID.toInt)
}
}
}
六,主函数。
def main(args:Array[String]) {
val (ratings,movieTitle) = PrepareData()
val model = ALS.train(ratings,5,20,0.1)
recommend(model,movieTitle)
}
编译成功后在windows下运行,需要下载https://github.com/srccodes/hadoop-common-2.2.0-bin,
然后添加系统变量HADOOP_HOME,值是上面那个文件的解压地点,Path中添加$HADOOP_HOMEin
实际执行