推荐系统: 数据 + 挖掘/训练 ---> 模型/用户的兴趣爱好等特征---> 给用户做推荐
总结:
Am*k X Bk*n = Cm*n ----至于乘法的规则,是数学问题, 知道可以乘即可,不需要我们自己计算
反过来
Cm*n = Am*k X Bk*n ----至于矩阵如何拆分/如何分解,是数学问题,知道可以拆/可以分解即可
使用SparkMllib中提供的基于隐语义模型的协同过滤算法-ALS算法就可以实现该需求!
代码:
package cn.itcast.edu.model import org.apache.spark.SparkContext import org.apache.spark.ml.recommendation.{ALS, ALSModel} import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} /** * Author itcast * Desc */ object ALSMovieDemoTest { def main(args: Array[String]): Unit = { //TODO 0.准备环境 val spark: SparkSession = SparkSession.builder().appName("BatchAnalysis").master("local[*]") .config("spark.sql.shuffle.partitions", "4")//本次测试时将分区数设置小一点,实际开发中可以根据集群规模调整大小,默认200 .getOrCreate() val sc: SparkContext = spark.sparkContext sc.setLogLevel("WARN") import spark.implicits._ import org.apache.spark.sql.functions._ //TODO 1.加载数据并处理 val fileDS: Dataset[String] = spark.read.textFile("data/input/u.data") val ratingDF: DataFrame = fileDS.map(line => { val arr: Array[String] = line.split(" ") (arr(0).toInt, arr(1).toInt, arr(2).toDouble) }).toDF("userId", "movieId", "score") val Array(trainSet,testSet) = ratingDF.randomSplit(Array(0.8,0.2))//按照8:2划分训练集和测试集 //TODO 2.构建ALS推荐算法模型并训练 val als: ALS = new ALS() .setUserCol("userId") //设置用户id是哪一列 .setItemCol("movieId") //设置产品id是哪一列 .setRatingCol("score") //设置评分列 .setRank(10) //可以理解为Cm*n = Am*k X Bk*n 里面的k的值 .setMaxIter(10) //最大迭代次数 .setAlpha(1.0)//迭代步长 //使用训练集训练模型 val model: ALSModel = als.fit(trainSet) //使用测试集测试模型 //val testResult: DataFrame = model.recommendForUserSubset(testSet,5) //计算模型误差--模型评估 //...... //TODO 3.给用户做推荐 val result1: DataFrame = model.recommendForAllUsers(5)//给所有用户推荐5部电影 val result2: DataFrame = model.recommendForAllItems(5)//给所有电影推荐5个用户 val result3: DataFrame = model.recommendForUserSubset(sc.makeRDD(Array(196)).toDF("userId"),5)//给指定用户推荐5部电影 val result4: DataFrame = model.recommendForItemSubset(sc.makeRDD(Array(242)).toDF("movieId"),5)//给指定电影推荐5个用户 result1.show(false) result2.show(false) result3.show(false) result4.show(false) } }
结果:
+------+--------------------------------------------------------------------------------------------+ |userId|recommendations | +------+--------------------------------------------------------------------------------------------+ |12 |[[113, 5.381579], [963, 5.31346], [1449, 5.2961473], [1169, 5.294356], [1642, 5.0616713]] | |13 |[[1473, 5.372843], [718, 5.01372], [814, 4.877114], [851, 4.840162], [867, 4.826006]] | |14 |[[1367, 5.183822], [1463, 5.0972223], [169, 5.092076], [1500, 5.024627], [1056, 5.0170174]] | |18 |[[1449, 5.018735], [1643, 4.864253], [1642, 4.8314786], [1367, 4.645226], [1302, 4.5830455]]| |25 |[[1449, 4.9288864], [1169, 4.832283], [963, 4.8168635], [113, 4.800234], [169, 4.764591]] | |37 |[[1169, 4.685104], [613, 4.512385], [12, 4.493502], [1467, 4.4650197], [96, 4.450862]] | |38 |[[394, 7.451351], [113, 5.693639], [989, 5.4949994], [888, 5.4821043], [143, 5.3772283]] | |46 |[[1449, 5.5280976], [1642, 5.391047], [958, 5.1321054], [318, 5.105298], [850, 5.097537]] | |50 |[[1368, 5.792747], [320, 5.706074], [42, 5.2726355], [853, 5.212684], [913, 5.138864]] | |52 |[[1449, 5.688901], [1642, 5.3448086], [1367, 5.3399267], [963, 5.2113814], [408, 5.1831136]]| |56 |[[1169, 4.6193953], [1450, 4.565966], [64, 4.51001], [22, 4.4720573], [613, 4.4647655]] | |65 |[[1643, 4.9899325], [318, 4.8156314], [1449, 4.788787], [1169, 4.783852], [1473, 4.678318]] | |67 |[[1169, 5.43927], [1240, 5.197868], [1664, 5.165731], [899, 5.154517], [1612, 5.15304]] | |70 |[[1449, 4.5494556], [1463, 4.408498], [1643, 4.351773], [1169, 4.3509493], [64, 4.3365135]] | |73 |[[1405, 5.040349], [1449, 4.7896886], [1589, 4.70632], [1463, 4.6175776], [1367, 4.6101847]]| |83 |[[22, 4.5427837], [1450, 4.5352006], [1169, 4.477106], [1189, 4.468127], [1167, 4.446891]] | |93 |[[1159, 5.2284937], [1631, 4.815165], [593, 4.7070503], [1260, 4.545691], [867, 4.5303392]] | |95 |[[113, 4.6072245], [1449, 4.5394883], [963, 4.5030937], [64, 4.4677086], [50, 4.4171376]] | |97 |[[1367, 5.010781], [1500, 4.9956975], [187, 4.8442287], [1169, 4.812822], [169, 4.795765]] | |101 |[[1169, 4.1324263], [1664, 4.000803], [174, 3.8331304], [96, 3.8325744], [1612, 3.8184724]] | +------+--------------------------------------------------------------------------------------------+ only showing top 20 rows +-------+------------------------------------------------------------------------------------------+ |movieId|recommendations | +-------+------------------------------------------------------------------------------------------+ |12 |[[810, 5.5758796], [118, 5.455514], [366, 5.4393682], [688, 5.216093], [640, 5.2092514]] | |13 |[[636, 4.49083], [810, 4.4198904], [118, 4.393448], [115, 4.3127003], [794, 4.229851]] | |14 |[[765, 4.960431], [362, 4.9018764], [519, 4.8501167], [147, 4.805251], [469, 4.768457]] | |18 |[[300, 4.402146], [581, 4.2777433], [695, 4.259108], [805, 4.192784], [567, 4.1795974]] | |25 |[[688, 4.8088303], [341, 4.7556705], [849, 4.576836], [628, 4.55899], [810, 4.5253415]] | |37 |[[695, 3.7592285], [239, 3.6988673], [805, 3.68078], [842, 3.6733208], [342, 3.6663287]] | |38 |[[127, 4.405499], [810, 4.337131], [507, 4.2949586], [38, 4.222893], [939, 4.2148314]] | |46 |[[770, 4.682896], [928, 4.673797], [688, 4.652969], [424, 4.637449], [783, 4.623057]] | |50 |[[810, 5.8363447], [640, 5.3496075], [118, 5.31877], [152, 5.298847], [849, 5.2969685]] | |52 |[[928, 4.5877066], [173, 4.5797167], [366, 4.532594], [688, 4.476476], [118, 4.458826]] | |56 |[[366, 5.9010973], [808, 5.384524], [636, 5.3037505], [408, 5.290825], [118, 5.2329607]] | |65 |[[628, 4.8349013], [688, 4.6787667], [270, 4.601569], [362, 4.513479], [427, 4.473649]] | |67 |[[137, 4.3174486], [810, 4.29837], [887, 4.2589283], [507, 4.238134], [534, 4.144943]] | |70 |[[688, 4.931924], [174, 4.7700405], [810, 4.6216316], [849, 4.585553], [173, 4.54105]] | |73 |[[688, 4.858781], [810, 4.835411], [939, 4.554215], [507, 4.5320325], [907, 4.506058]] | |83 |[[849, 5.4517775], [688, 5.3891783], [810, 5.293448], [939, 5.272237], [270, 5.221407]] | |93 |[[173, 4.9229717], [118, 4.768116], [219, 4.73464], [697, 4.701845], [148, 4.677616]] | |95 |[[688, 5.265843], [810, 5.249349], [939, 5.0150166], [849, 4.9963374], [907, 4.8846884]] | |97 |[[810, 4.8926907], [507, 4.811473], [688, 4.723339], [808, 4.709265], [260, 4.6865244]] | |101 |[[366, 5.3238587], [808, 4.8618436], [777, 4.8359947], [408, 4.7627087], [287, 4.6841784]]| +-------+------------------------------------------------------------------------------------------+ only showing top 20 rows +------+--------------------------------------------------------------------------------------------+ |userId|recommendations | +------+--------------------------------------------------------------------------------------------+ |196 |[[1643, 5.256925], [1463, 4.8774548], [1449, 4.7927065], [113, 4.7131853], [515, 4.5575466]]| +------+--------------------------------------------------------------------------------------------+ +-------+------------------------------------------------------------------------------------------+ |movieId|recommendations | +-------+------------------------------------------------------------------------------------------+ |242 |[[810, 4.9061694], [440, 4.8700376], [928, 4.8635535], [173, 4.8467245], [688, 4.8181868]]| +-------+------------------------------------------------------------------------------------------+