import pandas as pd
import numpy as np
f = open(r'ft_zodiac.txt', encoding='utf-8')
ft_zodiac = pd.read_csv(f)
print(ft_zodiac.shape)
ft_zodiac.head()
(23519, 4)
|
0 |
order_id |
chinese_zodiac |
zodiac |
0 |
0 |
100000081567592448 |
狗 |
处女座 |
1 |
0 |
100000467565182976 |
牛 |
双子座 |
2 |
0 |
100000530945323008 |
羊 |
射手座 |
3 |
0 |
100000556765458432 |
鼠 |
摩羯座 |
4 |
0 |
100000598171623424 |
虎 |
水瓶座 |
pd15作为好坏的分割节点。>15 为坏人,<15为好人?
15天以上的人为坏,5天以内的人为好。
l = open(r'zodiac_label.txt')
zodiac_label=pd.read_csv(l)
zodiac_label.head()
|
order_id |
overdue_days |
repay_time |
label |
0 |
100000081567592448 |
0 |
2018-07-09 |
0 |
1 |
100000467565182976 |
1 |
2018-07-09 |
0 |
2 |
100000530945323008 |
0 |
2018-07-09 |
0 |
3 |
100000556765458432 |
0 |
2018-07-09 |
0 |
4 |
100000598171623424 |
0 |
2018-07-09 |
0 |
set(zodiac_label.label)
{0, 1, 2}
# 剔除不等于2的
ft_label = zodiac_label[zodiac_label['label'] != 2]
ft_label.head()
|
order_id |
overdue_days |
repay_time |
label |
0 |
100000081567592448 |
0 |
2018-07-09 |
0 |
1 |
100000467565182976 |
1 |
2018-07-09 |
0 |
2 |
100000530945323008 |
0 |
2018-07-09 |
0 |
3 |
100000556765458432 |
0 |
2018-07-09 |
0 |
4 |
100000598171623424 |
0 |
2018-07-09 |
0 |
set(ft_label.label)
{0, 1}
data = pd.merge(ft_label,ft_zodiac,on = 'order_id',how = 'inner')
data.head()
|
order_id |
overdue_days |
repay_time |
label |
0 |
chinese_zodiac |
zodiac |
0 |
100000081567592448 |
0 |
2018-07-09 |
0 |
0 |
狗 |
处女座 |
1 |
100000467565182976 |
1 |
2018-07-09 |
0 |
0 |
牛 |
双子座 |
2 |
100000530945323008 |
0 |
2018-07-09 |
0 |
0 |
羊 |
射手座 |
3 |
100000556765458432 |
0 |
2018-07-09 |
0 |
0 |
鼠 |
摩羯座 |
4 |
100000598171623424 |
0 |
2018-07-09 |
0 |
0 |
虎 |
水瓶座 |
badrate = bad/toal
zodiac_list = set(data.zodiac)
zodiac_list
{'双子座',
'双鱼座',
'处女座',
'天秤座',
'天蝎座',
'射手座',
'巨蟹座',
'摩羯座',
'水瓶座',
'狮子座',
'白羊座',
'金牛座'}
chinese_zodiac_list = set(data.chinese_zodiac)
chinese_zodiac_list
{'兔', '牛', '狗', '猪', '猴', '羊', '虎', '蛇', '马', '鸡', '鼠', '龙'}
# 星座
zodiac_badrate = {}
for x in zodiac_list:
a = data[data.zodiac == x]
bad = a[a.label == 1]['label'].count() # 坏的计数
good = a[a.label == 0]['label'].count() # 好的计数
zodiac_badrate[x] = bad/(bad+good)
zodiac_badrate
{'双子座': 0.1312410841654779,
'巨蟹座': 0.1408351026185421,
'狮子座': 0.12760416666666666,
'射手座': 0.14480286738351256,
'水瓶座': 0.140117994100295,
'白羊座': 0.13455414012738853,
'双鱼座': 0.14873646209386282,
'处女座': 0.13035143769968052,
'天秤座': 0.12461252324860508,
'天蝎座': 0.12005028284098052,
'摩羯座': 0.12920489296636087,
'金牛座': 0.12259059367771781}
f = zip(zodiac_badrate.keys(), zodiac_badrate.values())
f = sorted(f, key = lambda x : x[1], reverse = True )
zodiac_badrate = pd.DataFrame(f)
zodiac_badrate.columns = pd.Series(['星座', 'badrate'])
zodiac_badrate
|
星座 |
badrate |
0 |
双鱼座 |
0.148736 |
1 |
射手座 |
0.144803 |
2 |
巨蟹座 |
0.140835 |
3 |
水瓶座 |
0.140118 |
4 |
白羊座 |
0.134554 |
5 |
双子座 |
0.131241 |
6 |
处女座 |
0.130351 |
7 |
摩羯座 |
0.129205 |
8 |
狮子座 |
0.127604 |
9 |
天秤座 |
0.124613 |
10 |
金牛座 |
0.122591 |
11 |
天蝎座 |
0.120050 |
from pyecharts import Line
x = zodiac_badrate['星座']
y = zodiac_badrate['badrate']
line = Line('星座')
line.add(1, x, y)
<div id="c56416b4b8514d2780bb35f9e761fcf5" style="800px;height:400px;"></div>
# 生肖
chinese_zodiac_badrate = {}
for x in chinese_zodiac_list:
a = data[data.chinese_zodiac == x]
bad = a[a.label == 1]['label'].count() # 好的计数
good = a[a.label == 0]['label'].count() # 坏的计数
chinese_zodiac_badrate[x] = bad/(bad+good)
chinese_zodiac_badrate
{'猪': 0.14269406392694065,
'牛': 0.1578112609040444,
'虎': 0.15165876777251186,
'龙': 0.1439084219133279,
'鼠': 0.1340602950609365,
'兔': 0.1502843216896832,
'鸡': 0.12846998063266624,
'蛇': 0.12789827973074047,
'羊': 0.11335403726708075,
'猴': 0.12008141112618724,
'马': 0.12053872053872054,
'狗': 0.11052009456264776}
f = zip(chinese_zodiac_badrate.keys(),chinese_zodiac_badrate.values())
f = sorted(f,key = lambda x : x[1],reverse = True )
chinese_zodiac_badrate = pd.DataFrame(f)
chinese_zodiac_badrate.columns = pd.Series(['生肖','badrate'])
chinese_zodiac_badrate
|
生肖 |
badrate |
0 |
牛 |
0.157811 |
1 |
虎 |
0.151659 |
2 |
兔 |
0.150284 |
3 |
龙 |
0.143908 |
4 |
猪 |
0.142694 |
5 |
鼠 |
0.134060 |
6 |
鸡 |
0.128470 |
7 |
蛇 |
0.127898 |
8 |
马 |
0.120539 |
9 |
猴 |
0.120081 |
10 |
羊 |
0.113354 |
11 |
狗 |
0.110520 |
from pyecharts import Line
x = chinese_zodiac_badrate['生肖']
y = chinese_zodiac_badrate['badrate']
line = Line('生肖')
line.add(1,x,y)
<div id="8801efc233e94477a9d56e1162e60a2b" style="800px;height:400px;"></div>