根据集体智慧编程第二章内容,运用欧几里得距离算法或者皮尔逊相关系数算法,可以在数据库(SQL Server)中实现一个简单的推荐系统。
项目背景:
假设现在有一组来自基金销售网站的数据,记录了投资者购买基金的品种和购买的数量占该基金发售总量的百分比,我们可以利用这组数据为购买者提供一份推荐购买的基金列表。
数据准备:
1 CREATE TABLE tbl_Fund(
2 UserID int NULL,
3 ItemID int NULL,
4 Score decimal(15,5) NULL)
5 GO
6
7 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,101,42.00000)
8 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,101,5.00000)
9 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,101,4.00000)
10 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10047,101,2.00000)
11 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,101,2.00000)
12 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,101,3.00000)
13 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10073,101,5.00000)
14 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10097,101,5.00000)
15 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10104,101,5.00000)
16 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10111,101,5.00000)
17 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10112,101,5.00000)
18 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10113,101,5.00000)
19 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10139,101,5.00000)
20 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10157,101,5.00000)
21 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,104,10.00000)
22 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,104,2.50000)
23 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10094,104,1.00000)
24 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10098,104,2.50000)
25 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10099,104,2.50000)
26 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10100,104,1.00000)
27 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10105,104,2.50000)
28 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10106,104,5.00000)
29 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10107,104,2.50000)
30 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10110,104,2.50000)
31 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10117,104,2.50000)
32 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10134,104,2.50000)
33 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10182,104,2.50000)
34 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10184,104,1.00000)
35 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10185,104,5.00000)
36 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10188,104,2.00000)
37 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,113,5.00000)
38 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,113,75.00000)
39 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,114,3.33333)
40 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,114,3.33333)
41 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,114,1.66667)
42 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10060,114,6.66667)
43 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10119,114,1.66667)
44 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10121,114,2.40000)
45 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10131,114,3.33333)
46 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10135,114,1.66667)
47 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10141,114,3.33333)
48 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,115,6.00000)
49 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,115,10.00000)
50 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,119,10.00000)
51 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10067,121,5.00000)
52 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,122,5.00000)
53 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,122,2.50000)
54 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10054,122,5.00000)
55 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,131,10.00000)
56 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10282,131,10.00000)
57 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,132,25.00000)
58 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10072,132,25.00000)
59 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10058,134,0.55556)
60 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10160,134,1.11111)
61 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,138,0.75000)
62 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,138,0.50000)
63 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,138,1.00000)
64 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,138,0.25000)
65 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,138,0.25000)
66 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,138,0.50000)
67 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10051,138,0.20000)
68 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10076,138,0.25000)
69 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10091,138,0.25000)
70 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10094,138,0.10000)
71 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10123,138,0.25000)
72 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10126,138,0.25000)
73 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,140,1.10000)
74 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,140,0.50000)
75 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,140,0.50000)
76 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10075,140,0.50000)
77 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10077,140,2.00000)
78 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10078,140,0.50000)
79 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10079,140,2.00000)
80 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10080,140,1.00000)
81 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10081,140,0.50000)
82 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10082,140,2.00000)
83 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10086,140,5.00000)
84 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10087,140,0.50000)
85 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10088,140,2.00000)
86 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10090,140,0.50000)
87 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10096,140,0.50000)
88 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10101,140,0.50000)
89 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10102,140,5.00000)
90 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10103,140,1.00000)
91 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10108,140,2.00000)
92 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10120,140,1.00000)
93 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10156,140,1.00000)
94 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10034,142,0.55556)
95 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,156,0.25000)
96 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10122,158,1.66667)
97 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,162,5.00000)
98 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,184,1.00000)
99 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10164,184,4.00000)
100 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10171,184,4.00000)
101 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10176,184,2.00000)
102 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10183,184,2.00000)
103 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10186,184,2.00000)
104 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10189,184,2.00000)
105 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10190,184,10.00000)
106 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10191,184,2.00000)
107 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10192,184,2.00000)
108 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,185,0.25000)
109 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10166,185,1.50000)
110 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10167,185,2.00000)
111 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10168,185,0.25000)
112 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10170,185,1.00000)
113 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10174,185,3.00000)
114 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10178,185,0.50000)
115 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10179,185,0.25000)
116 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10193,185,0.40000)
117 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,189,10.00000)
118 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10219,199,100.00000)
119 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,208,0.50000)
120 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,208,0.50000)
121 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,208,0.50000)
122 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,215,1.25000)
123 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10195,215,1.00000)
124 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,222,0.08621)
125 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,234,1.11111)
126 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10084,234,5.55556)
127 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,236,2.50000)
128 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,236,5.00000)
129 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,236,2.50000)
130 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10094,236,20.00000)
131 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10117,236,4.00000)
132 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10210,236,2.50000)
133 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10211,236,5.00000)
134 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,238,0.38462)
135 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10248,238,1.53846)
136 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10307,238,0.38462)
137 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,251,0.50000)
138 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,251,2.50000)
139 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,260,2.00000)
140 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,260,10.00000)
141 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10034,260,20.00000)
142 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10052,260,5.00000)
143 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,260,5.00000)
144 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10062,260,10.00000)
145 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10073,260,20.00000)
146 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10104,260,20.00000)
147 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10230,260,10.00000)
148 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10231,260,10.00000)
149 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10232,260,3.00000)
150 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10233,260,20.00000)
151 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10234,260,20.00000)
152 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10235,260,10.00000)
153 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10236,260,20.00000)
154 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10247,260,10.00000)
155 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,269,0.33333)
156 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,269,1.33333)
157 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10251,269,1.33333)
158 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10272,269,1.33333)
159 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10277,269,1.33333)
160 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10278,269,0.66667)
161 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10335,269,1.33333)
162 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10337,269,1.33333)
163 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,306,0.16667)
164 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10011,306,0.66667)
165 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,306,0.33333)
166 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,306,0.16667)
167 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10347,306,0.66667)
168 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10415,306,0.33333)
169 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10469,306,0.33333)
170 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10487,306,0.66667)
171 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10502,306,3.33333)
172 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10505,306,0.16667)
173 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10511,306,0.33333)
174 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10515,306,3.33333)
175 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10535,306,0.33333)
176 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10538,306,6.66667)
177 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10542,306,0.06667)
178 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10545,306,1.00000)
179 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10556,306,0.16667)
180 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10622,306,1.00000)
181 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10626,306,0.66667)
182 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10627,306,1.00000)
183 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10263,307,0.31250)
184 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10363,307,0.25000)
185 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10369,307,1.25000)
186 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10460,307,1.25000)
187 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10309,359,2.50000)
188 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10325,384,1.25000)
189 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10359,417,1.42857)
190 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10366,419,1.66667)
191 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10396,437,0.11905)
192 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,474,0.50000)
193 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10473,522,0.11111)
194 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,527,1.00000)
195 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10523,529,0.51282)
196 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10522,537,0.49652)
197 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,560,16.66667)
198 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10521,560,16.66667)
199 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10039,572,51.25000)
200 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10085,572,0.25000)
201 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10540,572,5.00000)
202 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10544,572,0.25000)
203 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10549,576,0.62500)
204 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,601,0.12500)
205 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,601,0.12500)
206 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10575,601,0.50000)
207 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10589,601,0.50000)
208 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10591,601,0.12500)
209 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10594,601,0.12500)
210 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10600,601,0.10000)
211 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10619,601,0.50000)
212 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10635,601,0.12500)
213 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10616,603,1.00000)
214 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10381,607,0.45455)
215 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10586,617,0.01000)
216 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10601,629,0.25000)
217 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,640,0.16667)
218 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10643,658,0.83333)
219 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10673,658,0.83333)
220 GO
相似度的SQL实现:
首先实现一个为指定用户寻找相似度最高的前20名用户的存储过程。其中@User为指定用户ID,@Method = 1对应于欧几里得距离算法,2对应于皮尔逊相关系数算法。
1 CREATE PROCEDURE usp_User_GetNeighborhoodForUser 2 @User int, 3 @Method tinyint = 1 4 AS 5 /* 6 @Method 7 1,Euclidean Distance Score 8 2,Pearson Correlation Score 9 */ 10 SET NOCOUNT ON 11 12 CREATE TABLE #Result ( 13 UserID int, 14 SimValue decimal(20,5) 15 ) 16 17 IF @Method = 1 18 BEGIN 19 INSERT INTO #Result 20 SELECT t2.UserID, 21 sim = 1.0 /(SQRT(SUM(POWER(t1.Score - t2.Score,2))) + 1) 22 FROM tbl_Fund t1 23 INNER JOIN tbl_Fund t2 24 ON t1.UserID = @User 25 AND t1.ItemID = t2.ItemID 26 AND t2.UserID <> @User 27 GROUP BY t2.UserID 28 END 29 ELSE 30 BEGIN 31 INSERT INTO #Result 32 SELECT UserID, 33 (pSum - (Sum1*Sum2 / n)) / CASE WHEN (Sum1Sq - POWER(Sum1,2) / n) * (Sum2Sq - POWER(Sum2,2) / n) = 0 34 THEN NULL ELSE SQRT((Sum1Sq - POWER(Sum1,2) / n)*(Sum2Sq - POWER(Sum2,2) / n)) END 35 FROM ( 36 SELECT t2.UserID, 37 pSum = SUM(t1.Score * t2.Score), 38 Sum1 = SUM(t1.Score), 39 Sum2 = SUM(t2.Score), 40 n = COUNT(t2.ItemID), 41 Sum1Sq = SUM(POWER(t1.Score,2)), 42 Sum2Sq = SUM(POWER(t2.Score,2)) 43 FROM tbl_Fund t1 44 INNER JOIN tbl_Fund t2 45 ON t1.UserID = @User 46 AND t1.ItemID = t2.ItemID 47 AND t2.UserID <> @User 48 GROUP BY t2.UserID 49 ) A 50 END 51 52 SELECT TOP 20 UserID, 53 SimValue 54 FROM #Result 55 ORDER BY SimValue DESC 56 57 DROP TABLE #Result 58 59 SET NOCOUNT OFF
为用户10009寻找相似度高的用户。
EXEC usp_User_GetNeighborhoodForUser @User = 10009,@Method= 1 EXEC usp_User_GetNeighborhoodForUser @User = 10009,@Method= 2
计算推荐的SQL实现:
计算推荐的方法同样参考自集体智慧编程。首先找到指定用户未购买的基金,其次用找到的相似度值乘以每位购买者购买的百分比,最后将该值求和后除以相似度的和得到最后的推荐值。推荐值越高的基金将可能被该指定用户购买。
1 CREATE PROCEDURE usp_User_GetRecommendedItemsForUser 2 @User int, 3 @Method tinyint = 1 4 AS 5 /* 6 @Method 7 1,Euclidean Distance Score 8 2,Pearson Correlation Score 9 10 */ 11 SET NOCOUNT ON 12 13 CREATE TABLE #Neighborhood ( 14 UserID int, 15 SimValue decimal(20,5) 16 ) 17 18 INSERT INTO #Neighborhood 19 EXEC usp_User_GetNeighborhoodForUser 20 @User = @User, 21 @Method = @Method 22 23 SELECT TOP 5 ItemID, 24 SimValue 25 FROM ( 26 SELECT f.ItemID, 27 SimValue = SUM(f.Score * n.SimValue) * 1.0 / SUM(n.SimValue) 28 FROM tbl_Fund f 29 INNER JOIN #Neighborhood n 30 ON n.UserID = f.UserID 31 WHERE f.UserID <> @User 32 AND NOT EXISTS (SELECT 1 FROM tbl_Fund WHERE UserID = @User AND ItemID = f.ItemID) 33 AND n.SimValue > 0 34 GROUP BY f.ItemID 35 ) A 36 ORDER BY SimValue DESC 37 38 DROP TABLE #Neighborhood 39 40 SET NOCOUNT OFF
实例:
为用户10009分别使用欧几里得和皮尔逊得到推荐产品,可以发现虽然结果不是完全相同,但是113,132,131是两种方法都推荐的产品,有一定的参考意义。
1 EXEC usp_User_GetRecommendedItemsForUser @User = 10009,@Method= 2 2 EXEC usp_User_GetRecommendedItemsForUser @User = 10009,@Method= 1
补充:
将集体智慧编程中的数据导入后,为用户Toby(Userid = 7)推荐电影,不同的方法将得到一样结果。是不是说明用购买基金的百分比作为参考值不科学?!
INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,1,2.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,2,3.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,3,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,4,3.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,5,2.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,6,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,1,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,2,3.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,3,1.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,4,5.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,5,3.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,6,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,1,2.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,2,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,1,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,4,3.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,6,4.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,2,3.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,3,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,4,4.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,5,2.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,6,4.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,1,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,2,4.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,4,5.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,5,3.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,6,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (7,2,4.50000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (7,4,4.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (7,5,1.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,2,4.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,3,2.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,4,3.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,5,2.00000) INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,6,3.00000) GO
EXEC usp_User_GetRecommendedItemsForUser @User = 7,@Method= 1 EXEC usp_User_GetRecommendedItemsForUser @User = 7,@Method= 2