2016-05-25
18号写了一个名为p_etl_social_scan的过程,其中用到了40多个union,语句显得非常臃肿、累赘、冗长,因而本次通过嵌套3层循环,对该过程进行优化。外层循环是对网站这个字段进行遍历,中层循环是对品牌关键词这个字段进行遍历,内层循环是对游标取值进行循环。通过3层嵌套循环,将原本2000多行的代码,一下子缩减到了1000行不到,并且在后期对网站、品牌关键词的增改上,变得方便很多了。
1、视频sp:p_etl_media_v2
-- AUTHORS:cenliang -- CREATED_DATE:2016-05-25 /*********************** 视频网站 ********************************/ -- DROP PROCEDURE -- IF EXISTS p_etl_media_v2; -- delimiter // -- -- -- CREATE DEFINER = `root`@`%` PROCEDURE `p_etl_media_v2` ( -- IN v_dayid_first INT, -- IN v_dayid_end INT -- ) BEGIN -- 采集日期 DECLARE v_sys_date INT (10); -- 文件类型 DECLARE v_file_type VARCHAR (7); -- 网站名称 DECLARE v_website VARCHAR (50); -- 品牌关键词 DECLARE v_keyword VARCHAR (50); -- 视频播放url、(图片详情url,新需求) DECLARE v_watch_url VARCHAR (250) DEFAULT NULL; -- 视频下载url、图片下载url DECLARE v_download_url VARCHAR (250) DEFAULT NULL; -- 上传日期 DECLARE v_upload_date VARCHAR (50) DEFAULT NULL; -- 声量(综合排名) DECLARE v_volume INT (11) DEFAULT 0; -- 视频评论数、图片评论数 DECLARE v_comment_num INT (11) DEFAULT NULL; -- 视频点赞数、图片喜欢数 DECLARE v_dianzan_num INT (11) DEFAULT NULL; -- 视频收藏数、图片收藏数 DECLARE v_collect_num INT (11) DEFAULT NULL; -- 视频分享数、图片转发数(目前微博有!) DECLARE v_forward_num INT (11) DEFAULT NULL; -- 视频播放数 DECLARE v_watch_num INT (11) DEFAULT NULL; -- 视频弹幕数(目前B站有!) DECLARE v_tanmu_num INT (11) DEFAULT NULL; -- 视频网络指数(目前优酷有!) DECLARE v_net_num INT (11) DEFAULT NULL; -- 视频标题 DECLARE v_title VARCHAR (250) DEFAULT NULL; -- 视频封面 DECLARE v_cover_pic VARCHAR (250) DEFAULT NULL; -- 视频播放时长 DECLARE v_stay_time VARCHAR (50) DEFAULT NULL; -- 视频真实下载到本地的文件名 DECLARE v_media_file_name VARCHAR (250) DEFAULT NULL; -- 存储路径(不包括文件名,只是上级路径) DECLARE v_save_dir VARCHAR (250) DEFAULT NULL; -- 是否视频已经下架找不到 DECLARE v_video_not_found INT (11) DEFAULT 0; -- 标签浏览数 DECLARE v_tag_browse_num INT (11) DEFAULT NULL; -- 标签图片数 DECLARE v_tag_pic_num INT (11) DEFAULT NULL; -- 标签关注数 DECLARE v_tag_follow_num INT (11) DEFAULT NULL; -- 标签热度 DECLARE v_tag_hot_num INT (11) DEFAULT NULL; -- 标签名 DECLARE v_tag_name VARCHAR (50) DEFAULT NULL; -- 图片名 DECLARE v_pic_name VARCHAR (50) DEFAULT NULL; -- 标签详情 DECLARE v_tag_info VARCHAR (1000) DEFAULT NULL; -- 图片详情 DECLARE v_pic_info VARCHAR (250) DEFAULT NULL; -- 活动 DECLARE v_activity VARCHAR (250) DEFAULT NULL; -- 用户名 DECLARE v_user_name VARCHAR (250) DEFAULT NULL; -- 用户id DECLARE v_user_id VARCHAR (250) DEFAULT NULL; -- 是否达到游标读取记录末尾的,控制变量 DECLARE var INT DEFAULT 0; -- 网站循环的指标变量:1、2、3、4、5、6、7 DECLARE v_website_index INT; -- 网站变量:美拍、搜狐、腾讯、优酷、乐视、bilibili、爱奇艺 DECLARE v_website_str VARCHAR (255); -- 关键词循环的指标变量:1、2、3、4 DECLARE v_keyword_index INT; -- 关键词变量:欧莱雅、美宝莲、美即、NYX DECLARE v_keyword_str VARCHAR (255); DECLARE cur CURSOR FOR SELECT DISTINCT REPLACE (LEFT(cralwTime, 10), '-', '') AS sys_date, 'media' AS file_type, source AS website, keyword AS keyword, watchUrl AS watch_url, downUrl AS download_url, REPLACE (uploadTime, '-', '') AS upload_date, ROUND( IFNULL( ( POWER(IFNULL(watchCount, 0), 2) + POWER(IFNULL(commentCount, 0), 2) + POWER(IFNULL(zanCount, 0), 2) + POWER(IFNULL(netCount, 0), 2) + POWER(IFNULL(favCount, 0), 2) + POWER(IFNULL(tanmuCount, 0), 2) + POWER(IFNULL(shareCount, 0), 2) ) / ( IFNULL(watchCount, 0) + IFNULL(commentCount, 0) + IFNULL(zanCount, 0) + IFNULL(netCount, 0) + IFNULL(favCount, 0) + IFNULL(tanmuCount, 0) + IFNULL(shareCount, 0) ), 0 ), 0 ) AS volume, IFNULL(commentCount, 0) AS comment_num, IFNULL(zanCount, 0) AS dianzan_num, IFNULL(favCount, 0) AS collect_num, 1 / 0 AS forward_num, IFNULL(watchCount, 0) AS watch_num, ( CASE v_website_str WHEN 'bilibili' THEN tanmuCount ELSE 1 / 0 END ) AS tanmu_num, ( CASE v_website_str WHEN '优酷' THEN netCount ELSE 1 / 0 END ) AS net_num, title AS title, coverPic AS cover_pic, length AS stay_time, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found, 1 / 0 AS tag_browse_num, 1 / 0 AS tag_pic_num, 1 / 0 AS tag_follow_num, 1 / 0 AS tag_hot_num, 1 / 0 AS tag_name, 1 / 0 AS pic_name, 1 / 0 AS tag_info, 1 / 0 AS pic_info, 1 / 0 AS activity, 1 / 0 AS user_name, 1 / 0 AS user_id FROM media.7videos WHERE source = v_website_str AND keyword = v_keyword_str AND watchUrl IS NOT NULL AND watchUrl <> '' AND ( title LIKE '%美宝莲%' OR title LIKE '%欧莱雅%' OR title LIKE '%美即%' OR title LIKE '%NYX%' ) AND REPLACE (LEFT(cralwTime, 10), '-', '') >= v_dayid_first AND REPLACE (LEFT(cralwTime, 10), '-', '') < v_dayid_end GROUP BY REPLACE (LEFT(cralwTime, 10), '-', ''), source, keyword, watchUrl ORDER BY volume DESC LIMIT 25; /*是否达到记录的末尾控制变量*/ DECLARE CONTINUE HANDLER FOR NOT FOUND SET var = 1; UPDATE media.7videos SET keyword = '美宝莲' WHERE ( keyword LIKE '%美宝莲%' OR keyword LIKE '%Maybeline%' ); UPDATE media.7videos SET keyword = '欧莱雅' WHERE ( keyword LIKE '%欧莱雅%' OR keyword LIKE '%Loreal%' ); UPDATE media.7videos SET keyword = '美即' WHERE ( keyword LIKE '%美即%' OR keyword LIKE '%MG%' ); UPDATE media.7videos SET keyword = 'NYX' WHERE keyword LIKE '%NYX%'; SET v_website_index = 1; SET v_website_str = ''; WHILE v_website_index <= 7 DO SELECT CASE v_website_index WHEN 1 THEN '美拍' WHEN 2 THEN '搜狐' WHEN 3 THEN '腾讯' WHEN 4 THEN '优酷' WHEN 5 THEN '乐视' WHEN 6 THEN 'bilibili' WHEN 7 THEN '爱奇艺' ELSE 'none' END INTO v_website_str; SET v_keyword_index = 1; SET v_keyword_str = ''; WHILE v_keyword_index <= 4 DO SELECT CASE v_keyword_index WHEN 1 THEN '欧莱雅' WHEN 2 THEN '美宝莲' WHEN 3 THEN '美即' WHEN 4 THEN 'NYX' ELSE 'none' END INTO v_keyword_str; OPEN cur; FETCH cur INTO v_sys_date, v_file_type, v_website, v_keyword, v_watch_url, v_download_url, v_upload_date, v_volume, v_comment_num, v_dianzan_num, v_collect_num, v_forward_num, v_watch_num, v_tanmu_num, v_net_num, v_title, v_cover_pic, v_stay_time, v_media_file_name, v_save_dir, v_video_not_found, v_tag_browse_num, v_tag_pic_num, v_tag_follow_num, v_tag_hot_num, v_tag_name, v_pic_name, v_tag_info, v_pic_info, v_activity, v_user_name, v_user_id; /*获取第一条记录*/ WHILE var <> 1 DO INSERT INTO loreal.social_scan_v2 ( sys_date, file_type, website, keyword, watch_url, download_url, upload_date, volume, comment_num, dianzan_num, collect_num, forward_num, watch_num, tanmu_num, net_num, title, cover_pic, stay_time, media_file_name, save_dir, video_not_found, tag_browse_num, tag_pic_num, tag_follow_num, tag_hot_num, tag_name, pic_name, tag_info, pic_info, activity, user_name, user_id ) VALUES ( v_sys_date, v_file_type, v_website, v_keyword, v_watch_url, v_download_url, v_upload_date, v_volume, v_comment_num, v_dianzan_num, v_collect_num, v_forward_num, v_watch_num, v_tanmu_num, v_net_num, v_title, v_cover_pic, v_stay_time, v_media_file_name, v_save_dir, v_video_not_found, v_tag_browse_num, v_tag_pic_num, v_tag_follow_num, v_tag_hot_num, v_tag_name, v_pic_name, v_tag_info, v_pic_info, v_activity, v_user_name, v_user_id ); FETCH cur INTO v_sys_date, v_file_type, v_website, v_keyword, v_watch_url, v_download_url, v_upload_date, v_volume, v_comment_num, v_dianzan_num, v_collect_num, v_forward_num, v_watch_num, v_tanmu_num, v_net_num, v_title, v_cover_pic, v_stay_time, v_media_file_name, v_save_dir, v_video_not_found, v_tag_browse_num, v_tag_pic_num, v_tag_follow_num, v_tag_hot_num, v_tag_name, v_pic_name, v_tag_info, v_pic_info, v_activity, v_user_name, v_user_id; /*取下一条记录*/ END WHILE; CLOSE cur; SET v_keyword_index = v_keyword_index + 1; SET var = 0; END WHILE; SET v_website_index = v_website_index + 1; SET var = 0; END WHILE; END -- // -- delimiter ;
2、视频job:p_etl_media_v2_job
-- p_etl_media_v2_job DROP EVENT IF EXISTS p_etl_media_v2_job; delimiter // CREATE DEFINER = `root`@`%` EVENT `p_etl_media_v2_job` ON SCHEDULE EVERY 1 DAY STARTS '2016-05-25 02:00:00' ON COMPLETION PRESERVE ENABLE DO BEGIN CALL media.p_etl_media_v2 ( cast( ( date_sub(curdate(), INTERVAL 1 DAY) ) AS SIGNED INTEGER ), cast(CURDATE() AS SIGNED INTEGER) ) ; END// delimiter ;
3、图片sp:p_etl_picture_v2
-- AUTHORS:cenliang -- CREATED_DATE:2016-05-24 -- /*********************** 图片网站 ********************************/ -- DROP PROCEDURE -- IF EXISTS p_etl_picture_v2; -- delimiter // -- -- -- CREATE DEFINER = `root`@`%` PROCEDURE `p_etl_picture_v2` ( -- IN v_dayid_first INT, -- IN v_dayid_end INT -- ) -- BEGIN -- 采集日期 DECLARE v_sys_date INT (10); -- 文件类型 DECLARE v_file_type VARCHAR (7); -- 网站名称 DECLARE v_website VARCHAR (50); -- 品牌关键词 DECLARE v_keyword VARCHAR (50); -- 视频播放url、(图片详情url,新需求) DECLARE v_watch_url VARCHAR (250) DEFAULT NULL; -- 视频下载url、图片下载url DECLARE v_download_url VARCHAR (250) DEFAULT NULL; -- 上传日期 DECLARE v_upload_date VARCHAR (50) DEFAULT NULL; -- 声量(综合排名) DECLARE v_volume INT (11) DEFAULT 0; -- 视频评论数、图片评论数 DECLARE v_comment_num INT (11) DEFAULT NULL; -- 视频点赞数、图片喜欢数 DECLARE v_dianzan_num INT (11) DEFAULT NULL; -- 视频收藏数、图片收藏数 DECLARE v_collect_num INT (11) DEFAULT NULL; -- 视频分享数、图片转发数(目前微博有!) DECLARE v_forward_num INT (11) DEFAULT NULL; -- 视频播放数 DECLARE v_watch_num INT (11) DEFAULT NULL; -- 视频弹幕数(目前B站有!) DECLARE v_tanmu_num INT (11) DEFAULT NULL; -- 视频网络指数(目前优酷有!) DECLARE v_net_num INT (11) DEFAULT NULL; -- 视频标题 DECLARE v_title VARCHAR (250) DEFAULT NULL; -- 视频封面 DECLARE v_cover_pic VARCHAR (250) DEFAULT NULL; -- 视频播放时长 DECLARE v_stay_time VARCHAR (50) DEFAULT NULL; -- 视频真实下载到本地的文件名 DECLARE v_media_file_name VARCHAR (250) DEFAULT NULL; -- 存储路径(不包括文件名,只是上级路径) DECLARE v_save_dir VARCHAR (250) DEFAULT NULL; -- 是否视频已经下架找不到 DECLARE v_video_not_found INT (11) DEFAULT 0; -- 标签浏览数 DECLARE v_tag_browse_num INT (11) DEFAULT NULL; -- 标签图片数 DECLARE v_tag_pic_num INT (11) DEFAULT NULL; -- 标签关注数 DECLARE v_tag_follow_num INT (11) DEFAULT NULL; -- 标签热度 DECLARE v_tag_hot_num INT (11) DEFAULT NULL; -- 标签名 DECLARE v_tag_name VARCHAR (50) DEFAULT NULL; -- 图片名 DECLARE v_pic_name VARCHAR (50) DEFAULT NULL; -- 标签详情 DECLARE v_tag_info VARCHAR (1000) DEFAULT NULL; -- 图片详情 DECLARE v_pic_info VARCHAR (250) DEFAULT NULL; -- 活动 DECLARE v_activity VARCHAR (250) DEFAULT NULL; -- 用户名 DECLARE v_user_name VARCHAR (250) DEFAULT NULL; -- 用户id DECLARE v_user_id VARCHAR (250) DEFAULT NULL; -- 是否达到游标读取记录末尾的,控制变量 DECLARE var INT DEFAULT 0; -- 网站循环的指标变量:1、2、3 DECLARE v_website_index INT; -- 网站变量:nice、in、weibo DECLARE v_website_str VARCHAR (255); -- 关键词循环的指标变量:1、2、3、4 DECLARE v_keyword_index INT; -- 关键词变量:欧莱雅、美宝莲、美即、NYX DECLARE v_keyword_str VARCHAR (255); DECLARE cur CURSOR FOR SELECT DISTINCT REPLACE (date_time, '-', '') AS sys_date, 'picture' AS file_type, source AS website, brand AS keyword, image_detail_url AS watch_url, pic_url AS download_url, ( CASE v_website_str WHEN 'in' THEN REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, '.info/in/', - 1 ), 10 ), '/', '' ) WHEN 'nice' THEN REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, 'upload/show/', - 1 ), 10 ), '/', '' ) WHEN 'weibo' THEN 1 / 0 ELSE 'none' END ) AS upload_date, ROUND( IFNULL( ( POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(pic_like_num, 0), 2) + POWER(IFNULL(pic_collection, 0), 2) + POWER(IFNULL(pic_forward, 0), 2) ) / ( IFNULL(pic_comments_num, 0) + IFNULL(pic_like_num, 0) + IFNULL(pic_collection, 0) + IFNULL(pic_forward, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(pic_like_num, 0) AS dianzan_num, 1 / 0 AS collect_num, ( CASE v_website_str WHEN 'in' THEN 1 / 0 WHEN 'nice' THEN 1 / 0 WHEN 'weibo' THEN pic_forward ELSE 1 / 0 END ) AS forward_num, 1 / 0 AS watch_num, 1 / 0 AS tanmu_num, 1 / 0 AS net_num, 1 / 0 AS title, 1 / 0 AS cover_pic, 1 / 0 AS stay_time, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found, IFNULL(browse_num, 0) AS tag_browse_num, IFNULL(tag_pic_num, 0) AS tag_pic_num, IFNULL(tag_follower, 0) AS tag_follow_num, IFNULL(temprature, 0) AS tag_hot_num, tag_name AS tag_name, pic_name AS pic_name, tag_info AS tag_info, pic_info AS pic_info, activity AS activity, user_name AS user_name, user_id AS user_id FROM picture.nice WHERE source = v_website_str AND brand = v_keyword_str AND REPLACE (date_time, '-', '') >= v_dayid_first AND REPLACE (date_time, '-', '') < v_dayid_end GROUP BY REPLACE (date_time, '-', ''), source, brand, pic_url ORDER BY volume DESC LIMIT 50; /*是否达到记录的末尾控制变量*/ DECLARE CONTINUE HANDLER FOR NOT FOUND SET var = 1; SET v_website_index = 1; SET v_website_str = ''; WHILE v_website_index <= 3 DO SELECT CASE v_website_index WHEN 1 THEN 'nice' WHEN 2 THEN 'in' WHEN 3 THEN 'weibo' ELSE 'none' END INTO v_website_str; SET v_keyword_index = 1; SET v_keyword_str = ''; WHILE v_keyword_index <= 4 DO SELECT CASE v_keyword_index WHEN 1 THEN '欧莱雅' WHEN 2 THEN '美宝莲' WHEN 3 THEN '美即' WHEN 4 THEN 'NYX' ELSE 'none' END INTO v_keyword_str; OPEN cur; FETCH cur INTO v_sys_date, v_file_type, v_website, v_keyword, v_watch_url, v_download_url, v_upload_date, v_volume, v_comment_num, v_dianzan_num, v_collect_num, v_forward_num, v_watch_num, v_tanmu_num, v_net_num, v_title, v_cover_pic, v_stay_time, v_media_file_name, v_save_dir, v_video_not_found, v_tag_browse_num, v_tag_pic_num, v_tag_follow_num, v_tag_hot_num, v_tag_name, v_pic_name, v_tag_info, v_pic_info, v_activity, v_user_name, v_user_id; /*获取第一条记录*/ WHILE var <> 1 DO INSERT INTO loreal.social_scan_v2 ( sys_date, file_type, website, keyword, watch_url, download_url, upload_date, volume, comment_num, dianzan_num, collect_num, forward_num, watch_num, tanmu_num, net_num, title, cover_pic, stay_time, media_file_name, save_dir, video_not_found, tag_browse_num, tag_pic_num, tag_follow_num, tag_hot_num, tag_name, pic_name, tag_info, pic_info, activity, user_name, user_id ) VALUES ( v_sys_date, v_file_type, v_website, v_keyword, v_watch_url, v_download_url, v_upload_date, v_volume, v_comment_num, v_dianzan_num, v_collect_num, v_forward_num, v_watch_num, v_tanmu_num, v_net_num, v_title, v_cover_pic, v_stay_time, v_media_file_name, v_save_dir, v_video_not_found, v_tag_browse_num, v_tag_pic_num, v_tag_follow_num, v_tag_hot_num, v_tag_name, v_pic_name, v_tag_info, v_pic_info, v_activity, v_user_name, v_user_id ); FETCH cur INTO v_sys_date, v_file_type, v_website, v_keyword, v_watch_url, v_download_url, v_upload_date, v_volume, v_comment_num, v_dianzan_num, v_collect_num, v_forward_num, v_watch_num, v_tanmu_num, v_net_num, v_title, v_cover_pic, v_stay_time, v_media_file_name, v_save_dir, v_video_not_found, v_tag_browse_num, v_tag_pic_num, v_tag_follow_num, v_tag_hot_num, v_tag_name, v_pic_name, v_tag_info, v_pic_info, v_activity, v_user_name, v_user_id; /*取下一条记录*/ END WHILE; CLOSE cur; SET v_keyword_index = v_keyword_index + 1; SET var = 0; END WHILE; SET v_website_index = v_website_index + 1; SET var = 0; END WHILE; END -- // -- delimiter ;
4、图片job:p_etl_picture_v2_job
-- p_etl_picture_v2_job DROP EVENT IF EXISTS p_etl_picture_v2_job; delimiter // CREATE DEFINER = `root`@`%` EVENT `p_etl_picture_v2_job` ON SCHEDULE EVERY 1 DAY STARTS '2016-05-25 02:00:00' ON COMPLETION PRESERVE ENABLE DO BEGIN CALL picture.p_etl_picture_v2 ( cast( ( date_sub(curdate(), INTERVAL 1 DAY) ) AS SIGNED INTEGER ), cast(CURDATE() AS SIGNED INTEGER) ) ; END// delimiter ;