• [Python] 练习代码


    # from random import randrange
    # num = int(input('摇几次骰子: '))
    # sides=int(input('筛子有几个面: '))
    # sum=0
    # for i in range(num):
    #     sum+= randrange(sides)+1
    # print('最终的点数和是 ',sum,'平均点数是:',sum/num)
    
    
    
    # from random import shuffle
    # from pprint import pprint
    # values=list(range(1,11))+'Jack Queen King'.split()  #并入列表中
    # card_suits='diamonds clubs hearts spades'.split()
    # value_suit=['{} of {}'.format(v,c) for v in values for c in card_suits]
    # shuffle(value_suit)  #打乱顺序
    # pprint(value_suit[:12])
    # while value_suit:
    #     input(value_suit.pop())
    
    
    
    f=open('a123.txt','a')
    f.write('hello aaaaaaaaaaaaadddddddddddddddddd')
    f.close()
    
    f=open('a123.txt','r')
    for i in range(10):
        print(f.readline(),end='')
    
    
    f = open('a123.txt','a')
    f.write('This
    is no
    haikou')
    f.close()
    
    
    
    def process(string):
        print('处理中...',string)
        
    
    # with open('a123.txt','r') as f:
    #     while True:
    #         line=f.readline()
    #         if not line:
    #             break
    #         process(line)
    with open('a123.txt','r') as f:
        for line in f:
            process(line)
    
    with open('a123.txt','r') as f:
        for line in f.readlines():
            process(line)
            
            
            
    
    
    
    def triangles():
        row = [1]
        while True:
            yield(row)
            row = [1] + [row[k] + row[k + 1] for k in range(len(row) - 1)] + [1]
    n = 0
    results = []
    for t in triangles():
        print(t)
        results.append(t)
        n = n + 1
        if n == 10:
            break
    if results == [
        [1],
        [1, 1],
        [1, 2, 1],
        [1, 3, 3, 1],
        [1, 4, 6, 4, 1],
        [1, 5, 10, 10, 5, 1],
        [1, 6, 15, 20, 15, 6, 1],
        [1, 7, 21, 35, 35, 21, 7, 1],
        [1, 8, 28, 56, 70, 56, 28, 8, 1],
        [1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
    ]:
        print('测试通过!')
    else:
        print('测试失败!')
        
        
    ' a test module '
    
    __author__ = 'Michael Liao'
    
    import sys
    
    def test():
        args = sys.argv
        if len(args)==1:
            print('Hello, world!')
        elif len(args)==2:
            print('Hello, %s!' % args[1])
        else:
            print('Too many arguments!')
    
    if __name__=='__main__':
        test()
    
    class Student(object):
        pass
    bart = Student()
    bart.name='jojo'
    bart.name
    
    
    
    class Student(object):
        def __init__(self, name, score):
            self.name = name
            self.score = score
    
        def get_grade(self):
            if self.score >= 90:
                return 'A'
            elif self.score >= 60:
                return 'B'
            else:
                return 'C'
    
    gg=Student('aaa',100)
    gg.get_grade()
    
    
    for c in "python":
        if c=='t':
            continue
        print(c,end=' ')
        
    
    s='python'
    while s !='':
        for c in s:
            print(c,end='')
        s=s[:-1]
    
    import random
    from pprint import pprint
    pprint(random.seed(10))
    random.random()
    
    
    from random import random
    from time import perf_counter
    DARTS=1000*10000
    hits=0.0
    start=perf_counter()
    for i in range(1,DARTS+1):
        x,y=random(),random()
        dist=pow(x**2+y**2,0.5)
        if dist <= 1:
            hits=hits+1
    pi = 4*(hits/DARTS)
    print("圆周率值是:{}".format(pi))
    print('运行时间是:{:.20f}s'.format(perf_counter()-start))
    
    import requests
    r=requests.get('http://www.shipxy.com/')
    r.status_code
    r.text
    
    
    
    for i in range(1,5):
        for j in range(1,5):
            for k in range(1,5):
                if (i!=j)and(j!=k)and(k!=i):
                    print(i,j,k)
                    
                    
    profit = int(input('输入发放的利润值(万元): '))
    if 0 <= profit <10:
        print('提成为:',profit*0.1,'万元')
    if 10 <= profit < 20:
        print('提成为:',(profit-10)*0.075+10*0.1,'万元')
    if 20 <= profit < 40:
        print('提成为:',(profit-20)*0.05+10*0.075+10*0.1,'万元')
    if 40 <= profit < 60:
        print('提成为:',(profit-40)*0.03+20*0.05+10*0.075+10*0.1,'万元')
    if 60 <= profit < 100:
        print('提成为:',(profit-60)*0.015+20*0.03+20*0.05+10*0.075+10*0.1,'万元')
    if profit >= 100:
        print('提成为:',(profit-100)*0.01+40*0.015+20*0.03+20*0.05+10*0.075+10*0.1,'万元')
        
    profit = int(input('输入企业的利润值(万元): '))
    gap = [100,60,40,20,10,0]
    ratio =[0.01,0.015,0.03,0.05,0.075,0.1]
    bonus=0
    for idx in range(0,6):
        if profit >= gap[idx]:
            bonus += (profit-gap[idx])*ratio[idx]
            profit=gap[idx]
    print('提成为:',bonus,'万元')
    
    
    profit = int(input('输入企业的利润值(万元): '))
    def get_bonus(profit):
        bonus = 0
        if 0 <= profit <= 10:
            bonus = 0.1*profit
        elif (profit > 10) and (profit <= 20):
            bonus = (profit-10)*0.075 + get_bonus(10)
        elif (profit > 20) and (profit <= 40):
            bonus = (profit-20)*0.05 + get_bonus(20)
        elif (profit > 40) and (profit <= 60):
            bonus = (profit-40)*0.03 + get_bonus(40)
        elif (profit > 60) and (profit <= 100):
            bonus = (profit-60)*0.015 + get_bonus(60)
        elif (profit >100):
            bonus = (profit-100)*0.01 + get_bonus(100)
        else:
            print("利润输入值不能为负")
        return bonus
    
    if __name__ == '__main__':
        print('提成为:',get_bonus(profit),'万元')
    
    
    
    
    
    '''
    分析:
    x + 100 = m^2
    x + 100 + 168 = n^2
    n^2 - m^2 = 168
    (n + m) * (n - m) = 168
    n > m >= 0
    n - m 最小值为 1
    n + m 最大为 168
    n 最大值为 168
    m 最大值为 167
    '''
    
    def _test():
        for m in range(0, 168):
            for n in range(m + 1, 169):
            #print('n=%s,m=%s' % (n, m))
                if (n + m) * (n - m) == 168:
                    print("该数为:" + str(n * n - 168 - 100))
                    print("该数为:" + str(m * m - 100))
                    print('n为%s,m为%s' % (n, m))
    if __name__ == '__main__':
        _test()
        
    def test1():
        for n in range(0,168):
            for m in range(n,169):
                if (m+n)*(m-n) == 168:
                    print("这个整数是: ",str(n*n-100))
    if __name__ =='__main__':
        test1()
    
    import pandas as pd
    df = pd.read_csv(r'c:UsersclementeDesktopall	rain.csv',index_col='Id')
    df.head()
    
    
    for i in range(0,7):
        for j in range(0,7):
            for k in range(0,7):
                for g in range(0,7):
                    for h in range(0,7):
                        while (i!=j) and(i!=g) and(g!=h)and(h!=k)and(k!=i):
                            if (i+j+k+g+h)==15:
                                print (i,j,k,g,h)
                    
    
    
    import random
    def gen5num():
        alldigit=[0,1,2,3,4,5,6,0]
        first=random.randint(0,6)  #randint包含两端,0和6
        alldigit.remove(first)
        second=random.choice(alldigit)
        alldigit.remove(second)
        third=random.choice(alldigit)
        alldigit.remove(third)
        forth=random.choice(alldigit)
        alldigit.remove(forth)
        fiveth=random.choice(alldigit)
        alldigit.remove(fiveth)
        if (first+second+third+forth+fiveth)==15:
            return first,second,third,forth,fiveth
    if __name__=='__main__':
        for i in range(100):
            print(gen5num())
    
    
    
    #!/usr/bin/env python3
    #coding=utf-8
    
    from itertools import permutations
    t = 0
    for i in permutations('0123456',5):
        print(''.join(i))
        t += 1
    
    print("不重复的数量有:%s"%t)
    
    
    def sum_1():
        """
        aaaddd
        """
        for i in '01234567':
            p += int(i)
            print(sum(p))
    sum_1()
    
    np.*load*?
    
    
    
    #题目:数组中找出两个元素之和 等于给定的整数
    
    # 思路:
    # 1、将数组元素排序;
    # 2、array[i]与a[j](j的取值:i+1到len_array-1) 相加;
    # 3、如两两相加<整数继续,如=整数则输出元素值;
    # 4、如>则直接退出,i+1 开始下一轮相加比较
    
    def addData(array, sumdata):
        """
        aaaadddd
        """
        temp_array = array
        temp_sumdata = sumdata
        print ("sumdata: {}".format(temp_sumdata))
        sorted(temp_array)
        len_temp_array = len(temp_array)
    
    # 计数符合条件的组数
        num = 0
        
        for i in range(0, len_temp_array-1):
            for j in range(i+1, len_temp_array):
                for k in range(j+1,len_temp_array):
                if temp_array[i] + temp_array[j] + temp_array[k] < temp_sumdata:
                    continue
                elif temp_array[i] + temp_array[j] + temp_array[k] == temp_sumdata:
                    num += 1
                    print("Group {} :".format(num))
                    print("下标:{}, 元素值: {}".format(i, temp_array[i]))
    
                else:
                    break
        
    if __name__=="__main__":
        test_array = [0,1,2,3,4,5,6,0]
        test_sumdata = 4
        addData(test_array, test_sumdata)
        
    
    
    #题目:数组中找出两个元素之和 等于给定的整数
    
    # 思路:
    # 1、将数组元素排序;
    # 2、array[i]与a[j](j的取值:i+1到len_array-1) 相加;
    # 3、如两两相加<整数继续,如=整数则输出元素值;
    # 4、如>则直接退出,i+1 开始下一轮相加比较
    
    import numpy as np
    names=np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
    data=np.random.randn(7,4)
    names
    data
    names == 'Bob'
    data[names=='Bob']
    
    
    arr[[4,3,0,6]]
    
    
    
    import matplotlib.pyplot as plt
    points = np.arange(-5,5,0.01)
    xs,ys=np.meshgrid(points,points)
    z=np.sqrt(xs**2+ys**2)
    
    plt.imshow(z,cmap=plt.cm.gray)
    plt.colorbar()
    plt.title("图像  $sqrt{x^2+y^2}$")
    
    import pandas as pd
    obj=pd.Series(range(3),index=["a","b","c"])
    index=obj.index
    index[1]='d'
    import numpy as np
    import pandas as pd
    data=pd.DataFrame(np.arange(16).reshape(4,4),index=[1,2,3,4],columns=["one","two","three","forth"])
    data<3
    
    
    df1=pd.DataFrame({"A":[1,2]})
    df1
    
    obj=pd.Series(["a","a","b","c"]*4)
    obj
    obj.describe()
    
    
    import json 
    result = json.loads(obj)
    result
    
    
    
    import pandas as pd
    ages=[12,34,23,45,67,30,20,55,98,30,43]
    bins=[1,20,30,40,50,100]
    cats=pd.cut(ages,bins)
    cats
    cats.codes
    pd.value_counts(cats)
    
    
    
    DataF=pd.DataFrame(np.arange(5*4).reshape((5,4)))
    DataF
    sample_1=np.random.permutation(5*4)
    sample_1.reshape(5,4)
    
    
    df=pd.DataFrame({'key':['b','b','a','c','a','b'],'data1':range(6)})
    df
    df[["data1"]]
    
    
    import pandas as pd
    left=pd.DataFrame({'key1':['foo','foo','bar'],'key2':['one','two','one'],'lval':[1,2,3]})
    right=pd.DataFrame({'key1':['foo','foo','bar','bar'],'key2':['one','one','one','two'],'rval':[4,5,6,7]})
    pd.merge(left,right,on=['key1'])
    
    
    
    import matplotlib.pyplot as plt
    import numpy as np
    data=np.arange(10000)
    plt.plot(data)
    
    fig=plt.figure()
    ax1=fig.add_subplot(2,2,1)
    ax2=fig.add_subplot(2,2,2)
    ax3=fig.add_subplot(2,2,3)
    
    ax1.hist(np.random.randn(100),bins=20,color='k',alpha=0.5)
    ax2.scatter(np.arange(30),np.arange(30)+3*np.random.randn(30))
    ax3.plot(np.random.randn(50).cumsum(),drawstyle='steps-post')
    
    
    
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    rect=plt.Rectangle((0.5,0.8),0.4,0.4,color='g',alpha=0.4)
    ax.add_patch(rect)
    
    plt.savefig("真的.svg",bbox_inches='tight')
    
    
    s=pd.Series(np.random.randn(10).cumsum())
    s.plot()
    
    s=pd.Series(np.random.randn(10).cumsum(),index=np.arange(0,100,10))
    s.plot()
    
    
    
    df=pd.DataFrame(np.random.randn(10,4).cumsum(0),columns=['A','B','C','D'],index=np.arange(0,100,10))
    df.plot()
    
    fig,axes=plt.subplots(2,1)
    data=pd.Series(np.random.rand(16),index=list("abcdefghijklmnop"))
    data.plot.bar(ax=axes[0],color='k',alpha=0.7)
    data.plot.barh(ax=axes[1],color='g',alpha=0.7)
    plt.show()
    
    
    df=pd.DataFrame(np.random.rand(6,4),index=['one','two','three','four','five','six'],columns=pd.Index(['A','B','C','D'],name='Genus'))
    df
    df.plot.bar()
    df.plot.barh(stacked=True,alpha=0.5)
    
    
    tips=pd.read_csv('tips.csv')
    party_counts = pd.crosstab(tips['day'],tips['size'])
    party_counts
    party_counts=party_counts.loc[:,2:5]
    party_counts
    
    party_counts.sum(1)
    
    party_pcts= party_counts.div(party_counts.sum(1),axis=0)
    party_pcts.plot.bar()
    
    
    
    import seaborn as sns
    tips=pd.read_csv('tips.csv')
    tips['tip_pct']=tips['tip']/(tips['total_bill']-tips['tip'])
    tips.head()
    sns.barplot(x='tip_pct',y='day',data=tips,orient='h')
    sns.barplot(x='tip_pct',y='day',hue='time',data=tips,orient='h')
    sns.set(style='whitegrid')
    
    
    tips['tip_pct'].plot.hist(bins=50)
    tips['total_bill'].plot.hist(bins=50)
    
    
    tips['tip_pct'].plot.density()
    tips['total_bill'].plot.density()
    
    
    comp1=np.random.normal(0,1,size=200)
    comp2=np.random.normal(10,2,size=200)
    values=pd.Series(np.concatenate([comp1,comp2]))
    sns.distplot(values,bins=101,color='k')
    
    
    macro=pd.read_csv('macrodata.csv')
    data=macro[['cpi','m1','tbilrate','unemp']]
    trans_data=np.log(data).diff().dropna()
    trans_data.head()
    trans_data[-5:]
    
    sns.regplot("m1","unemp",data=trans_data)
    plt.title('Changes in log {} versus log {}'.format('m1','unemp'))
    sns.set(style="ticks", color_codes=True)
    sns.pairplot(trans_data,diag_kind='kde',kind='reg')
    sns.pairplot(trans_data,diag_kind='hist',kind='reg')
    
    sns.factorplot(x='day',y='tip_pct',row='time',hue='smoker',kind='box',data=tips[tips.tip_pct<0.5])
    
    tips.describe()
    
    import matplotlib.pyplot as plt
    import pandas as pd
    import numpy as np
    df=pd.DataFrame({'key1':['a','a','b','b','a'],'key2':['one','two','one','two','one'],'data1':np.random.randn(5),'data2':np.random.randn(5)})
    df
    
    group_1=df['data1'].groupby(df['key1'])
    group_1.describe()
    group_2=df['data1'].groupby([df['key1'],df['key2']]).mean()
    group_2
    
    
    
    states=np.array(['Ohio','California','California','Ohio','Ohio'])
    years=np.array([2005,2005,2006,2005,2006])
    df['data1'].groupby([states,years]).mean()
    
    
    dict(list(df.groupby('key1')))
    
    
    try:
        year=input("输入年份:")
        month=input("输入月份: ")
        day=input("输入日期号: ")
    finally:
        print("正在计算")
    
    months2days=[0,31,59,90,120,151,181,212,243,273,304,334]
    # 闰年
    if int(year) % 4 ==0:
        for i in range(2,12,1):
            months2days[i] +=1
            
    month_index=[]
    for j in range(12):
        month_index.append(i+1)
    dict_md=dict(zip(month_index,months2days))
    whichday=dict_md[int(month)]+int(day)
    print('结果是: 第{}天'.format(whichday))
    
    
    
    
    
    def unsortedSearch(list, i, u):
        found = False
        pos = 0
        pos2 = 0
    
        while pos < len(list) and not found:
            if int(list[pos]) < int(u) :
                if int(list[pos2]) > int(i):
                    found = True
                pos2 = pos2 + 1
            pos = pos + 1
        return found
    
    unsortedList = ['1', '3', '4', '2', '6', '9', '2', '1', '3', '7']
    num1 = '3'
    num2 = '5'
    
    isItThere = unsortedSearch(unsortedList, num1, num2)
    
    if isItThere:
        print ("There is a number between those values")
    else:
        print ("There isn't a number between those values")
    
    
    
    
    
    def get_nums():
        nums=[]
        n=int(input("一共有几个整数?"))
        for i in range(n):
            x=int(input('请按次随机输入第{}个整数(剩余{}次输入):'.format(i+1,n-i)))
            nums.append(x)
        return nums
    if __name__=='__main__':
        list_nums=get_nums()
    
    def BubbleSort(nums):  #冒泡法
        print('初始整数集合为:{}'.format(nums))
        for i in range(len(nums)-1):
            for j in range(len(nums)-i-1):
                if nums[j]>nums[j+1]:
                    nums[j],nums[j+1]=nums[j+1],nums[j] #调换位置,相互赋值
            print("第{}次迭代排序结果:{}".format((len(nums)-j-1),nums))
        return nums
    if __name__=='__main__':
        print('经过冒泡法排序最终得到:{}'.format(BubbleSort(list_nums)))
        
        
    def get_nums():
        nums=[]
        n=int(input("一共有几个整数?"))
        for i in range(n):
            x=int(input('请按次随机输入第{}个整数(剩余{}次输入):'.format(i+1,n-i)))
            nums.append(x)
        return nums
    if __name__=='__main__':
        myList=get_nums()
    
    def selectedSort(myList):
        #获取list的长度
        length = len(myList)
        #一共进行多少轮比较
        for i in range(0,length-1):
            #默认设置最小值得index为当前值
            smallest = i
            #用当先最小index的值分别与后面的值进行比较,以便获取最小index
            for j in range(i+1,length):
                #如果找到比当前值小的index,则进行两值交换
                if myList[j]<myList[smallest]:
                    tmp = myList[j]
                    myList[j] = myList[smallest]
                    myList[smallest]=tmp
            #打印每一轮比较好的列表
            print("Round ",i,": ",myList) #根据第一个i循环进行打印,而不是选j循环
    
    
    print("选择排序法:迭代过程 ")
    selectedSort(myList)
    
    
    
        
    
    def merge_sort(LIST):
        start = []
        end = []
        while len(LIST) > 1:
            a = min(LIST)
            b = max(LIST)
            start.append(a)
            end.append(b)
            LIST.remove(a)
            LIST.remove(b)
        if LIST: 
            start.append(LIST[0])
        end.reverse()
        return (start + end)
    
    if __name__=='__main__':
        nums=[]
        n=int(input('一共几位数: '))
        for i in range(n):
            x=int(input("请依次输入整数:"))
            nums.append(x)
        print(merge_sort(nums))
        
        
    # =============================================================================
    #10.1.2
    # =============================================================================
    import pandas as pd
    df=pd.DataFrame({'key1':['a','a','b','b','a'],'key2':['one','two','one','two','one'],'data1':np.random.randn(5),'data2':np.random.randn(5)})
    df
    df.groupby(['key1','key2'])['data1'].mean()
    
    
    
    people=pd.DataFrame(np.random.randn(5,5),columns=['a','b','c','d','e'],index=['joe','steve','wes','jim','travis'])
    people
    mapping={'a':'red','b':'red','c':'blue','d':'blue','e':'red','f':'orange'}
    by_column=people.groupby(mapping,axis=1)
    by_column.mean()
    map_series=pd.Series(mapping)
    
    people.groupby(len).sum()
    
    
    # =============================================================================
    # 分组加权
    # =============================================================================
    
    import pandas as pd
    df=pd.DataFrame({'目录':['a','a','a','a','b','b','b','b'],'data':np.random.randn(8),'weights':np.random.randn(8)})
    df
    grouped=df.groupby('目录')
    get_weighpoint=lambda x: np.average(x['data'],weights=x['weights'])
    grouped.apply(get_weighpoint)
    
    
    # =============================================================================
    # 
    # =============================================================================
    
    spx=pd.read_csv('stock_px_2.csv',index_col=0,parse_dates=True)
    spx
    spx.info()
    
    
    
    
    from datetime import datetime
    
    datestrs=['7/6/2011','8/6/2011']
    [datetime.strptime(x,'%m/%d/%Y')for x in datestrs]
    
    
    dates=pd.date_range('1/1/2018',periods=1000)
    dates
    long_df=pd.DataFrame(np.random.randn(1000,4),index=dates,columns=['Colorado','Texas','New York','Ohio'])
    long_df
    
    pd.date_range('2018-10-1',periods=30,freq='1h')
    
    
    
    # =============================================================================
    # 
    # =============================================================================
    close_px_all=pd.read_csv("stock_px_2.csv",parse_dates=True,index_col=0)
    close_px=close_px_all[['AAPL','MSFT','XOM']]
    close_px=close_px.resample("B").ffill()
    close_px.AAPL.plot()
    close_px.AAPL.rolling(250).mean().plot()
    
    
    
    
    import pandas as pd
    import numpy as np
    values=pd.Series(['apple','orange','apple','apple']*2)
    values
    pd.unique(values)
    pd.value_counts(values)
    
    
    
    import pandas as pd
    import matplotlib.pyplot as plt
    from sklearn.linear_model import RANSACRegressor, LinearRegression, TheilSenRegressor
    from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
    from sklearn.svm import SVR
    from sklearn.linear_model import Ridge,Lasso,ElasticNet,BayesianRidge
    from sklearn.ensemble import GradientBoostingRegressor
    from sklearn.cross_validation import train_test_split
    
    data = pd.read_csv('../cement_data.csv')
    # 查看数据记录的长度,共1030行
    print(len(data))
    # 查看前五行数据
    data.head()
    
    
    
    import pandas
    titanic=pandas.read_csv('train.csv')
    titanic.head()
    titanic.describe()
    titanic['Age']=titanic['Age'].fillna(titanic['Age'].median())
    print(titanic['Sex'].unique()) #找Sex特征里的分类字符名,只有两种可能性
    titanic.loc[titanic['Sex']=='female','Sex']=1#把分类字符名转换成整数1,0形式,进行标记
    titanic.loc[titanic['Sex']=='male','Sex']=0
    #对embarked 登船地 进行填充(按最多标记)
    print(titanic['Embarked'].unique())
    titanic['Embarked']=titanic['Embarked'].fillna('S')
    titanic.loc[titanic['Embarked']=='S']=0
    titanic.loc[titanic['Embarked']=='C']=1
    titanic.loc[titanic['Embarked']=='Q']=2
    
    # =============================================================================
    # 引进模型,线性回归
    # =============================================================================
    from sklearn.linear_model import LinearRegression
    from sklearn.cross_validation import KFold
    #cross_validation 交叉验证,进行调参,训练数据集分成三份,三份做交叉验证
    
    predictors=['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked'] #需要输入并做预测的特征列
    alg=LinearRegression()
    kf=KFold(titanic.shape[0],n_folds=3,random_state=1) #shape[0]一共有多少行,random_state=1 随机种子开启,n_fold=3把训练集分为三份
    
    predictions=[]
    for train,test in kf:
        train_predictors=titanic[predictors].iloc[train,:]  #交叉验证中,除开训练的部分
        train_target=titanic['Survived'].iloc[train]#获取目标训练集
        alg.fit(train_predictors,train_target) #依据模型,训练
        
        test_predictions=alg.predict(titanic[predictors].iloc[test,:]) #测试集
        predictions.append(test_predictions)
        
    import numpy as np
    predictions=np.concatenate(predictions,axis=0)
    # 整理输出值,按照可能性分类到0,1
    predictions[predictions>=0.5]=0
    predictions[predictions<0.5]=1
    accuracy=sum(predictions[predictions==titanic['Survived']])/len(predictions)
    print(accuracy)
    
    
    # =============================================================================
    # 逻辑回归
    # =============================================================================
    from sklearn import cross_validation
    from sklearn.linear_model import LogisticRegression
    alg=LogisticRegression(random_state=1)
    scores=cross_validation.cross_val_score(alg,titanic[predictors],titanic['Survived'],cv=3)
    print(scores.mean())
    
    # =============================================================================
    # 随机森林
    # =============================================================================
    from sklearn import cross_validation
    from sklearn.ensemble import RandomForestClassifier
    predictors=['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']
    alg=RandomForestClassifier(random_state=1,n_estimators=10,min_samples_split=2,min_samples_leaf=1)
    kf=cross_validation.KFold(titanic.shape[0],n_folds=3,random_state=1)
    scores=scores=cross_validation.cross_val_score(alg,titanic[predictors],titanic['Survived'],cv=kf)
    print(scores.mean())
  • 相关阅读:
    service structure flowchart with full stack functionality in a brife map
    service structure flowchart [mobile to server via HTTP RESTful API and TCP/IP in a map]
    service structure flowchart [mobile to server via TCP/IP protocol]
    service structure flowchart [mobile to server via HTTP RESTful API]
    Python -- machine learning, neural network -- PyBrain 机器学习 神经网络
    Deploying OpenFire for IM (instant message) service (TCP/IP service) with database MySQL , client Spark on linux部署OpenFire IM 消息中间件服务
    做自己想做的事,成为想成为的人(转)
    PCB工艺要求
    看了三遍,沉默了五天
    850和940红外发射管的区别
  • 原文地址:https://www.cnblogs.com/clemente/p/9951189.html
Copyright © 2020-2023  润新知