• 简单推荐系统的SQL实现


    根据集体智慧编程第二章内容,运用欧几里得距离算法或者皮尔逊相关系数算法,可以在数据库(SQL Server)中实现一个简单的推荐系统。

    项目背景:

    假设现在有一组来自基金销售网站的数据,记录了投资者购买基金的品种和购买的数量占该基金发售总量的百分比,我们可以利用这组数据为购买者提供一份推荐购买的基金列表。

    数据准备:

      1 CREATE TABLE tbl_Fund(
      2     UserID int NULL,
      3     ItemID int NULL,
      4     Score decimal(15,5) NULL)
      5 GO
      6 
      7 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,101,42.00000)
      8 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,101,5.00000)
      9 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,101,4.00000)
     10 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10047,101,2.00000)
     11 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,101,2.00000)
     12 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,101,3.00000)
     13 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10073,101,5.00000)
     14 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10097,101,5.00000)
     15 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10104,101,5.00000)
     16 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10111,101,5.00000)
     17 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10112,101,5.00000)
     18 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10113,101,5.00000)
     19 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10139,101,5.00000)
     20 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10157,101,5.00000)
     21 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,104,10.00000)
     22 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,104,2.50000)
     23 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10094,104,1.00000)
     24 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10098,104,2.50000)
     25 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10099,104,2.50000)
     26 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10100,104,1.00000)
     27 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10105,104,2.50000)
     28 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10106,104,5.00000)
     29 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10107,104,2.50000)
     30 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10110,104,2.50000)
     31 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10117,104,2.50000)
     32 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10134,104,2.50000)
     33 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10182,104,2.50000)
     34 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10184,104,1.00000)
     35 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10185,104,5.00000)
     36 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10188,104,2.00000)
     37 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,113,5.00000)
     38 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,113,75.00000)
     39 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,114,3.33333)
     40 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,114,3.33333)
     41 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,114,1.66667)
     42 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10060,114,6.66667)
     43 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10119,114,1.66667)
     44 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10121,114,2.40000)
     45 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10131,114,3.33333)
     46 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10135,114,1.66667)
     47 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10141,114,3.33333)
     48 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,115,6.00000)
     49 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,115,10.00000)
     50 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,119,10.00000)
     51 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10067,121,5.00000)
     52 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,122,5.00000)
     53 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,122,2.50000)
     54 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10054,122,5.00000)
     55 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,131,10.00000)
     56 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10282,131,10.00000)
     57 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,132,25.00000)
     58 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10072,132,25.00000)
     59 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10058,134,0.55556)
     60 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10160,134,1.11111)
     61 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,138,0.75000)
     62 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,138,0.50000)
     63 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,138,1.00000)
     64 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,138,0.25000)
     65 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,138,0.25000)
     66 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,138,0.50000)
     67 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10051,138,0.20000)
     68 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10076,138,0.25000)
     69 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10091,138,0.25000)
     70 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10094,138,0.10000)
     71 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10123,138,0.25000)
     72 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10126,138,0.25000)
     73 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,140,1.10000)
     74 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,140,0.50000)
     75 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,140,0.50000)
     76 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10075,140,0.50000)
     77 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10077,140,2.00000)
     78 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10078,140,0.50000)
     79 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10079,140,2.00000)
     80 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10080,140,1.00000)
     81 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10081,140,0.50000)
     82 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10082,140,2.00000)
     83 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10086,140,5.00000)
     84 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10087,140,0.50000)
     85 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10088,140,2.00000)
     86 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10090,140,0.50000)
     87 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10096,140,0.50000)
     88 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10101,140,0.50000)
     89 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10102,140,5.00000)
     90 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10103,140,1.00000)
     91 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10108,140,2.00000)
     92 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10120,140,1.00000)
     93 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10156,140,1.00000)
     94 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10034,142,0.55556)
     95 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,156,0.25000)
     96 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10122,158,1.66667)
     97 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,162,5.00000)
     98 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,184,1.00000)
     99 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10164,184,4.00000)
    100 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10171,184,4.00000)
    101 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10176,184,2.00000)
    102 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10183,184,2.00000)
    103 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10186,184,2.00000)
    104 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10189,184,2.00000)
    105 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10190,184,10.00000)
    106 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10191,184,2.00000)
    107 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10192,184,2.00000)
    108 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,185,0.25000)
    109 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10166,185,1.50000)
    110 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10167,185,2.00000)
    111 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10168,185,0.25000)
    112 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10170,185,1.00000)
    113 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10174,185,3.00000)
    114 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10178,185,0.50000)
    115 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10179,185,0.25000)
    116 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10193,185,0.40000)
    117 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,189,10.00000)
    118 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10219,199,100.00000)
    119 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,208,0.50000)
    120 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,208,0.50000)
    121 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,208,0.50000)
    122 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,215,1.25000)
    123 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10195,215,1.00000)
    124 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,222,0.08621)
    125 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,234,1.11111)
    126 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10084,234,5.55556)
    127 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,236,2.50000)
    128 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10009,236,5.00000)
    129 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,236,2.50000)
    130 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10094,236,20.00000)
    131 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10117,236,4.00000)
    132 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10210,236,2.50000)
    133 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10211,236,5.00000)
    134 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,238,0.38462)
    135 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10248,238,1.53846)
    136 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10307,238,0.38462)
    137 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,251,0.50000)
    138 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10050,251,2.50000)
    139 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,260,2.00000)
    140 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,260,10.00000)
    141 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10034,260,20.00000)
    142 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10052,260,5.00000)
    143 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10061,260,5.00000)
    144 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10062,260,10.00000)
    145 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10073,260,20.00000)
    146 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10104,260,20.00000)
    147 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10230,260,10.00000)
    148 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10231,260,10.00000)
    149 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10232,260,3.00000)
    150 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10233,260,20.00000)
    151 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10234,260,20.00000)
    152 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10235,260,10.00000)
    153 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10236,260,20.00000)
    154 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10247,260,10.00000)
    155 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,269,0.33333)
    156 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10035,269,1.33333)
    157 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10251,269,1.33333)
    158 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10272,269,1.33333)
    159 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10277,269,1.33333)
    160 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10278,269,0.66667)
    161 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10335,269,1.33333)
    162 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10337,269,1.33333)
    163 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,306,0.16667)
    164 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10011,306,0.66667)
    165 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,306,0.33333)
    166 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,306,0.16667)
    167 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10347,306,0.66667)
    168 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10415,306,0.33333)
    169 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10469,306,0.33333)
    170 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10487,306,0.66667)
    171 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10502,306,3.33333)
    172 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10505,306,0.16667)
    173 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10511,306,0.33333)
    174 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10515,306,3.33333)
    175 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10535,306,0.33333)
    176 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10538,306,6.66667)
    177 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10542,306,0.06667)
    178 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10545,306,1.00000)
    179 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10556,306,0.16667)
    180 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10622,306,1.00000)
    181 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10626,306,0.66667)
    182 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10627,306,1.00000)
    183 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10263,307,0.31250)
    184 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10363,307,0.25000)
    185 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10369,307,1.25000)
    186 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10460,307,1.25000)
    187 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10309,359,2.50000)
    188 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10325,384,1.25000)
    189 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10359,417,1.42857)
    190 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10366,419,1.66667)
    191 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10396,437,0.11905)
    192 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,474,0.50000)
    193 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10473,522,0.11111)
    194 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,527,1.00000)
    195 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10523,529,0.51282)
    196 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10522,537,0.49652)
    197 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10015,560,16.66667)
    198 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10521,560,16.66667)
    199 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10039,572,51.25000)
    200 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10085,572,0.25000)
    201 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10540,572,5.00000)
    202 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10544,572,0.25000)
    203 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10549,576,0.62500)
    204 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,601,0.12500)
    205 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10017,601,0.12500)
    206 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10575,601,0.50000)
    207 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10589,601,0.50000)
    208 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10591,601,0.12500)
    209 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10594,601,0.12500)
    210 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10600,601,0.10000)
    211 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10619,601,0.50000)
    212 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10635,601,0.12500)
    213 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10616,603,1.00000)
    214 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10381,607,0.45455)
    215 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10586,617,0.01000)
    216 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10601,629,0.25000)
    217 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10007,640,0.16667)
    218 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10643,658,0.83333)
    219 INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (10673,658,0.83333)
    220 GO
    View Code

    相似度的SQL实现:

    首先实现一个为指定用户寻找相似度最高的前20名用户的存储过程。其中@User为指定用户ID,@Method = 1对应于欧几里得距离算法,2对应于皮尔逊相关系数算法。

     1 CREATE PROCEDURE usp_User_GetNeighborhoodForUser
     2 @User int,
     3 @Method tinyint = 1
     4 AS
     5 /*
     6 @Method
     7 1,Euclidean Distance Score 
     8 2,Pearson Correlation Score
     9 */
    10 SET NOCOUNT ON 
    11 
    12 CREATE TABLE #Result (
    13     UserID int,
    14     SimValue decimal(20,5)
    15 )
    16 
    17 IF @Method = 1
    18 BEGIN
    19     INSERT INTO #Result
    20     SELECT t2.UserID,
    21         sim = 1.0 /(SQRT(SUM(POWER(t1.Score - t2.Score,2))) + 1)
    22     FROM tbl_Fund t1
    23     INNER JOIN tbl_Fund t2
    24     ON t1.UserID = @User
    25         AND t1.ItemID = t2.ItemID
    26         AND t2.UserID <> @User
    27     GROUP BY t2.UserID
    28 END
    29 ELSE
    30 BEGIN
    31     INSERT INTO #Result
    32         SELECT UserID,
    33     (pSum - (Sum1*Sum2 / n)) / CASE WHEN (Sum1Sq - POWER(Sum1,2) / n) * (Sum2Sq - POWER(Sum2,2) / n) = 0 
    34             THEN NULL ELSE SQRT((Sum1Sq - POWER(Sum1,2) / n)*(Sum2Sq - POWER(Sum2,2) / n)) END
    35     FROM (
    36         SELECT t2.UserID,
    37             pSum = SUM(t1.Score * t2.Score),
    38             Sum1 = SUM(t1.Score),
    39             Sum2 = SUM(t2.Score),
    40             n = COUNT(t2.ItemID),
    41             Sum1Sq = SUM(POWER(t1.Score,2)),
    42             Sum2Sq = SUM(POWER(t2.Score,2))
    43         FROM tbl_Fund t1
    44         INNER JOIN tbl_Fund t2
    45         ON t1.UserID = @User
    46             AND t1.ItemID = t2.ItemID
    47             AND t2.UserID <> @User
    48         GROUP BY t2.UserID
    49     ) A
    50 END
    51 
    52 SELECT TOP 20 UserID,
    53     SimValue
    54 FROM #Result
    55 ORDER BY SimValue DESC
    56 
    57 DROP TABLE #Result
    58 
    59 SET NOCOUNT OFF

    为用户10009寻找相似度高的用户。

    EXEC usp_User_GetNeighborhoodForUser @User = 10009,@Method= 1
    EXEC usp_User_GetNeighborhoodForUser @User = 10009,@Method= 2

    计算推荐的SQL实现:

    计算推荐的方法同样参考自集体智慧编程。首先找到指定用户未购买的基金,其次用找到的相似度值乘以每位购买者购买的百分比,最后将该值求和后除以相似度的和得到最后的推荐值。推荐值越高的基金将可能被该指定用户购买。

     1 CREATE PROCEDURE usp_User_GetRecommendedItemsForUser
     2 @User int,
     3 @Method tinyint = 1
     4 AS
     5 /*
     6 @Method
     7 1,Euclidean Distance Score 
     8 2,Pearson Correlation Score
     9 
    10 */
    11 SET NOCOUNT ON 
    12 
    13 CREATE TABLE #Neighborhood (
    14     UserID int,
    15     SimValue decimal(20,5)
    16 )
    17 
    18 INSERT INTO #Neighborhood
    19 EXEC usp_User_GetNeighborhoodForUser
    20 @User = @User,
    21 @Method = @Method
    22 
    23 SELECT TOP 5 ItemID,
    24     SimValue
    25 FROM (
    26     SELECT f.ItemID, 
    27         SimValue = SUM(f.Score * n.SimValue) * 1.0 / SUM(n.SimValue)
    28     FROM tbl_Fund f
    29     INNER JOIN #Neighborhood n
    30     ON n.UserID = f.UserID
    31     WHERE f.UserID <> @User
    32         AND NOT EXISTS (SELECT 1 FROM tbl_Fund WHERE UserID = @User AND ItemID = f.ItemID)
    33         AND n.SimValue > 0
    34     GROUP BY f.ItemID
    35 ) A
    36 ORDER BY SimValue DESC
    37 
    38 DROP TABLE #Neighborhood
    39 
    40 SET NOCOUNT OFF

     实例:

    为用户10009分别使用欧几里得和皮尔逊得到推荐产品,可以发现虽然结果不是完全相同,但是113,132,131是两种方法都推荐的产品,有一定的参考意义。

    1 EXEC usp_User_GetRecommendedItemsForUser @User = 10009,@Method= 2
    2 EXEC usp_User_GetRecommendedItemsForUser @User = 10009,@Method= 1

    补充:

    将集体智慧编程中的数据导入后,为用户Toby(Userid = 7)推荐电影,不同的方法将得到一样结果。是不是说明用购买基金的百分比作为参考值不科学?!

    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,1,2.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,2,3.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,3,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,4,3.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,5,2.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (1,6,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,1,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,2,3.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,3,1.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,4,5.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,5,3.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (2,6,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,1,2.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,2,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,1,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,4,3.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (3,6,4.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,2,3.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,3,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,4,4.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,5,2.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (4,6,4.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,1,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,2,4.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,4,5.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,5,3.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (6,6,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (7,2,4.50000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (7,4,4.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (7,5,1.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,2,4.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,3,2.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,4,3.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,5,2.00000)
    INSERT INTO tbl_Fund(UserID,ItemID,Score) VALUES (5,6,3.00000)
    GO
    View Code
    EXEC usp_User_GetRecommendedItemsForUser @User = 7,@Method= 1
    EXEC usp_User_GetRecommendedItemsForUser @User = 7,@Method= 2

  • 相关阅读:
    ps 批量杀死进程
    福特F-550 4x4 越野房车设计方案欣赏_房车欣赏_21世纪房车网
    geoserver技术交流群
    JAVA 中基本数组类型转byte数组
    JAVA 图片png和jpeg格式的转化方式
    win10下安装postgresql10 出现Problem running post-install step. Installation may not complete correctly.The database cluster initialisation failed
    centos7安装postgresql和postgis
    jdk1.8的jvm参数的查看以及GC日志的分析
    [warn] Neither build.sbt nor a 'project' directory in the current directory: C:Usershgt
    centos7使用docker制作tomcat本地镜像
  • 原文地址:https://www.cnblogs.com/cxy486/p/4095405.html
Copyright © 2020-2023  润新知