• 无用之学matplotlib,numpy,pandas


    一、matplotlib学习

    matplotlib: 最流行的Python底层绘图库,主要做数据可视化图表,名字取材于MATLAB,模仿MATLAB构建

    例子1:

    # coding=utf-8
    from matplotlib import pyplot as plt
    
    x = range(2,26,2)
    y = [15,13,14.5,17,20,25,26,26,27,22,18,15]
    
    #设置图片大小
    plt.figure(figsize=(20,8),dpi=80)
    
    
    
    #绘图
    plt.plot(x,y)
    
    #设置x轴的刻度
    _xtick_labels = [i/2 for i in range(4,49)]
    plt.xticks(range(25,50))
    plt.yticks(range(min(y),max(y)+1))
    
    #保存
    # plt.savefig("./t1.png")
    
    #展示图形
    plt.show()
    View Code

    图片如下

    例子2

    # coding=utf-8
    from matplotlib import pyplot as plt
    import random
    import matplotlib
    from matplotlib import font_manager
    
    #windws和linux设置字体的放
    # font = {'family' : 'MicroSoft YaHei',
    #         'weight': 'bold',
    #         'size': 'larger'}
    # matplotlib.rc("font",**font)
    # matplotlib.rc("font",family='MicroSoft YaHei',weight="bold")
    
    #另外一种设置字体的方式
    my_font = font_manager.FontProperties(fname="/System/Library/Fonts/PingFang.ttc")
    
    x = range(0,120)
    y = [random.randint(20,35) for i in range(120)]
    
    plt.figure(figsize=(20,8),dpi=80)
    
    plt.plot(x,y)
    
    #调整x轴的刻度
    _xtick_labels = ["10点{}分".format(i) for i in range(60)]
    _xtick_labels += ["11点{}分".format(i) for i in range(60)]
    #取步长,数字和字符串一一对应,数据的长度一样
    plt.xticks(list(x)[::3],_xtick_labels[::3],rotation=45,fontproperties=my_font) #rotaion旋转的度数
    
    #添加描述信息
    plt.xlabel("时间",fontproperties=my_font)
    plt.ylabel("温度 单位(℃)",fontproperties=my_font)
    plt.title("10点到12点每分钟的气温变化情况",fontproperties=my_font)
    
    plt.show()
    View Code

     例子3

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    
    my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\STHUPO.ttf")
    
    y = [1,0,1,1,2,4,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
    x = range(11,31)
    
    #设置图形大小
    plt.figure(figsize=(20,8),dpi=80)
    
    plt.plot(x,y)
    
    #设置x轴刻度
    _xtick_labels = ["{}岁".format(i) for i in x]
    plt.xticks(x,_xtick_labels,fontproperties=my_font)
    plt.yticks(range(0,9))
    
    #绘制网格
    plt.grid(alpha=0.1)
    
    #展示
    plt.show()
    View Code

    例子4

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    
    my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\STHUPO.ttf")
    
    y_1 = [1,0,1,1,2,4,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
    y_2 = [1,0,3,1,2,2,3,3,2,1 ,2,1,1,1,1,1,1,1,1,1]
    
    x = range(11,31)
    
    #设置图形大小
    plt.figure(figsize=(20,8),dpi=80)
    
    plt.plot(x,y_1,label="自己",color="#F08080")
    plt.plot(x,y_2,label="同桌",color="#DB7093",linestyle="--")
    
    #设置x轴刻度
    _xtick_labels = ["{}岁".format(i) for i in x]
    plt.xticks(x,_xtick_labels,fontproperties=my_font)
    # plt.yticks(range(0,9))
    
    #绘制网格
    plt.grid(alpha=0.4,linestyle=':')
    
    #添加图例
    plt.legend(prop=my_font,loc="upper left")
    
    #展示
    plt.show()
    View Code

    例子5

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    
    my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\STHUPO.ttf")
    y_3 = [11,17,16,11,12,11,12,6,6,7,8,9,12,15,14,17,18,21,16,17,20,14,15,15,15,19,21,22,22,22,23]
    y_10 = [26,26,28,19,21,17,16,19,18,20,20,19,22,23,17,20,21,20,22,15,11,15,5,13,17,10,11,13,12,13,6]
    
    x_3 = range(1,32)
    x_10 = range(51,82)
    
    #设置图形大小
    plt.figure(figsize=(20,8),dpi=80)
    
    #使用scatter方法绘制散点图,和之前绘制折线图的唯一区别
    plt.scatter(x_3,y_3,label="3月份")
    plt.scatter(x_10,y_10,label="10月份")
    
    #调整x轴的刻度
    _x = list(x_3)+list(x_10)
    _xtick_labels = ["3月{}日".format(i) for i in x_3]
    _xtick_labels += ["10月{}日".format(i-50) for i in x_10]
    plt.xticks(_x[::3],_xtick_labels[::3],fontproperties=my_font,rotation=45)
    
    #添加图例
    plt.legend(loc="upper left",prop=my_font)
    
    #添加描述信息
    plt.xlabel("时间",fontproperties=my_font)
    plt.ylabel("温度",fontproperties=my_font)
    plt.title("标题",fontproperties=my_font)
    #展示
    plt.show()
    View Code

    例子6

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\STHUPO.ttf")
    
    
    a = ["战狼2","速度与激情8","功夫瑜伽","西游伏妖篇","变形金刚5:最后的骑士","摔跤吧!爸爸","加勒比海盗5:死无对证","金刚:骷髅岛","极限特工:终极回归","生化危机6:终章","乘风破浪","神偷奶爸3","智取威虎山","大闹天竺","金刚狼3:殊死一战","蜘蛛侠:英雄归来","悟空传","银河护卫队2","情圣","新木乃伊",]
    
    b=[56.01,26.94,17.53,16.49,15.45,12.96,11.8,11.61,11.28,11.12,10.49,10.3,8.75,7.55,7.32,6.99,6.88,6.86,6.58,6.23]
    
    
    #设置图形大小
    plt.figure(figsize=(20,15),dpi=80)
    #绘制条形图
    plt.bar(range(len(a)),b,width=0.7)
    #设置字符串到x轴
    plt.xticks(range(len(a)),a,fontproperties=my_font,rotation=90)
    
    plt.savefig("./movie.png")
    
    plt.show()
    View Code

    例子6-2

    #绘制横着的条形图
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\STHUPO.ttf")
    
    
    a = ["战狼2","速度与激情8","功夫瑜伽","西游伏妖篇","变形金刚5:最后的骑士","摔跤吧!爸爸","加勒比海盗5:死无对证","金刚:骷髅岛","极限特工:终极回归","生化危机6:终章","乘风破浪","神偷奶爸3","智取威虎山","大闹天竺","金刚狼3:殊死一战","蜘蛛侠:英雄归来","悟空传","银河护卫队2","情圣","新木乃伊",]
    
    b=[56.01,26.94,17.53,16.49,15.45,12.96,11.8,11.61,11.28,11.12,10.49,10.3,8.75,7.55,7.32,6.99,6.88,6.86,6.58,6.23]
    
    
    #设置图形大小
    plt.figure(figsize=(20,8),dpi=80)
    #绘制条形图
    plt.barh(range(len(a)),b,height=0.3,color="orange")
    #设置字符串到x轴
    plt.yticks(range(len(a)),a,fontproperties=my_font)
    
    plt.grid(alpha=0.3)
    # plt.savefig("./movie.png")
    
    plt.show()
    View Code

    例子7

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\STHUPO.ttf")
    
    
    a = ["猩球崛起3:终极之战","敦刻尔克","蜘蛛侠:英雄归来","战狼2"]
    b_16 = [15746,312,4497,319]
    b_15 = [12357,156,2045,168]
    b_14 = [2358,399,2358,362]
    
    bar_width = 0.2
    
    x_14 = list(range(len(a)))
    x_15 =  [i+bar_width for i in x_14]
    x_16 = [i+bar_width*2 for i in x_14]
    
    #设置图形大小
    plt.figure(figsize=(20,8),dpi=80)
    
    plt.bar(range(len(a)),b_14,width=bar_width,label="9月14日")
    plt.bar(x_15,b_15,width=bar_width,label="9月15日")
    plt.bar(x_16,b_16,width=bar_width,label="9月16日")
    
    #设置图例
    plt.legend(prop=my_font)
    
    #设置x轴的刻度
    plt.xticks(x_15,a,fontproperties=my_font)
    
    plt.show()
    View Code

    例子8

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    
    a=[131,  98, 125, 131, 124, 139, 131, 117, 128, 108, 135, 138, 131, 102, 107, 114, 119, 128, 121, 142, 127, 130, 124, 101, 110, 116, 117, 110, 128, 128, 115,  99, 136, 126, 134,  95, 138, 117, 111,78, 132, 124, 113, 150, 110, 117,  86,  95, 144, 105, 126, 130,126, 130, 126, 116, 123, 106, 112, 138, 123,  86, 101,  99, 136,123, 117, 119, 105, 137, 123, 128, 125, 104, 109, 134, 125, 127,105, 120, 107, 129, 116, 108, 132, 103, 136, 118, 102, 120, 114,105, 115, 132, 145, 119, 121, 112, 139, 125, 138, 109, 132, 134,156, 106, 117, 127, 144, 139, 139, 119, 140,  83, 110, 102,123,107, 143, 115, 136, 118, 139, 123, 112, 118, 125, 109, 119, 133,112, 114, 122, 109, 106, 123, 116, 131, 127, 115, 118, 112, 135,115, 146, 137, 116, 103, 144,  83, 123, 111, 110, 111, 100, 154,136, 100, 118, 119, 133, 134, 106, 129, 126, 110, 111, 109, 141,120, 117, 106, 149, 122, 122, 110, 118, 127, 121, 114, 125, 126,114, 140, 103, 130, 141, 117, 106, 114, 121, 114, 133, 137,  92,121, 112, 146,  97, 137, 105,  98, 117, 112,  81,  97, 139, 113,134, 106, 144, 110, 137, 137, 111, 104, 117, 100, 111, 101, 110,105, 129, 137, 112, 120, 113, 133, 112,  83,  94, 146, 133, 101,131, 116, 111,  84, 137, 115, 122, 106, 144, 109, 123, 116, 111,111, 133, 150]
    
    #计算组数
    d = 3  #组距
    num_bins = (max(a)-min(a))//d
    print(max(a),min(a),max(a)-min(a))
    print(num_bins)
    
    
    #设置图形的大小
    plt.figure(figsize=(20,8),dpi=80)
    plt.hist(a,num_bins,normed=True)
    
    #设置x轴的刻度
    plt.xticks(range(min(a),max(a)+d,d))
    
    plt.grid()
    
    plt.show()
    View Code

    例子9

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    
    interval = [0,5,10,15,20,25,30,35,40,45,60,90]
    width = [5,5,5,5,5,5,5,5,5,15,30,60]
    quantity = [836,2737,3723,3926,3596,1438,3273,642,824,613,215,47]
    
    
    print(len(interval),len(width),len(quantity))
    
    #设置图形大小
    plt.figure(figsize=(20,8),dpi=80)
    
    
    
    plt.bar(range(12),quantity,width=1)
    
    #设置x轴的刻度
    _x = [i-0.5 for i in range(13)]
    _xtick_labels =  interval+[150]
    plt.xticks(_x,_xtick_labels)
    
    plt.grid(alpha=0.4)
    plt.show()
    View Code

     例子9-2

    # coding=utf-8
    from matplotlib import pyplot as plt
    from matplotlib import font_manager
    
    interval = [0,5,10,15,20,25,30,35,40,45,60,90]
    width = [5,5,5,5,5,5,5,5,5,15,30,60]
    quantity = [836,2737,3723,3926,3596,1438,3273,642,824,613,215,47]
    
    
    print(len(interval),len(width),len(quantity))
    
    #设置图形大小
    plt.figure(figsize=(20,8),dpi=80)
    
    
    
    plt.bar(interval,quantity,width=width)
    
    #设置x轴的刻度
    
    temp_d = [5]+ width[:-1]
    _x = [i-temp_d[interval.index(i)]*0.5 for i in interval]
    
    
    plt.xticks(_x,interval)
    
    plt.grid(alpha=0.4)
    plt.show()
    View Code

    二、常用问题总结

    应该选择那种图形来呈现数据
    matplotlib.plot(x,y)
    matplotlib.bar(x,y)
    matplotlib.scatter(x,y)
    matplotlib.hist(data,bins,normed)
    xticks和yticks的设置
    label和titile,grid的设置
    绘图的大小和保存图片

    做法流程:

        明确问题 选择图形的呈现方式 准备数据 绘图和图形完善

    三、推荐网址

        1、matplotlib支持的图形是非常多的,如果有其他的需求,我们 可以查看一下url地址: http://matplotlib.org/gallery/index.html

         2、plotly:可视化工具中的github,相比于matplotlib更加简单,图形更加漂亮,同时兼容matplotlib和pandas 使用用法:简单,照着文档写即可 文档地址: https://plot.ly/python/

        3、echarts,前端框架,JS

        4、seaborn

    四、numpy

    一个在Python中做科学计算的基础库,重在数值计算,也是大部分PYTHON科学计算库的基础库,多用于在大型、多维数组上执行数值运算

    # coding=utf-8
    import numpy as np
    import random
    
    #使用numpy生成数组,得到ndarray的类型
    t1 = np.array([1,2,3,])
    print(t1)
    print(type(t1))
    
    t2 = np.array(range(10))
    print(t2)
    print(type(t2))
    
    t3 = np.arange(4,10,2)
    print(t3)
    print(type(t3))
    
    print(t3.dtype)
    print("*"*100)
    #numpy中的数据类型
    
    t4 = np.array(range(1,4),dtype="i1")
    print(t4)
    print(t4.dtype)
    
    ##numpy中的bool类型
    t5 = np.array([1,1,0,1,0,0],dtype=bool)
    print(t5)
    print(t5.dtype)
    
    #调整数据类型
    t6 = t5.astype("int8")
    print(t6)
    print(t6.dtype)
    
    #numpy中的小数
    t7 = np.array([random.random() for i in range(10)])
    print(t7)
    print(t7.dtype)
    
    t8 = np.round(t7,2)
    print(t8)

     1、读取本地数据

    # coding=utf-8
    import numpy as np
    
    us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
    uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
    
    # t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
    t2 = np.loadtxt(us_file_path,delimiter=",",dtype="int")
    
    # print(t1)
    print(t2)
    
    print("*"*100)
    
    #取行
    # print(t2[2])
    
    #取连续的多行
    # print(t2[2:])
    
    #取不连续的多行
    # print(t2[[2,8,10]])
    
    # print(t2[1,:])
    # print(t2[2:,:])
    # print(t2[[2,10,3],:])
    
    #取列
    # print(t2[:,0])
    
    #取连续的多列
    # print(t2[:,2:])
    
    #取不连续的多列
    # print(t2[:,[0,2]])
    
    #去行和列,取第3行,第四列的值
    # a = t2[2,3]
    # print(a)
    # print(type(a))
    
    #取多行和多列,取第3行到第五行,第2列到第4列的结果
    #去的是行和列交叉点的位置
    b = t2[2:5,1:4]
    # print(b)
    
    #取多个不相邻的点
    #选出来的结果是(00) (21) (23)
    c = t2[[0,2,2],[0,1,3]]
    print(c)

    2、exer

    # coding=utf-8
    import numpy as np
    
    
    # print(t1)
    def fill_ndarray(t1):
        for i in range(t1.shape[1]):  #遍历每一列
            temp_col = t1[:,i]  #当前的一列
            nan_num = np.count_nonzero(temp_col!=temp_col)
            if nan_num !=0: #不为0,说明当前这一列中有nan
                temp_not_nan_col = temp_col[temp_col==temp_col] #当前一列不为nan的array
    
                # 选中当前为nan的位置,把值赋值为不为nan的均值
                temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
        return t1
    
    if __name__ == '__main__':
        t1 = np.arange(24).reshape((4, 6)).astype("float")
        t1[1, 2:] = np.nan
        print(t1)
        t1 = fill_ndarray(t1)
        print(t1)

    3、exer2

    import numpy as np
    from matplotlib import  pyplot as plt
    
    us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
    uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
    
    # t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
    t_us = np.loadtxt(us_file_path,delimiter=",",dtype="int")
    
    #取评论的数据
    t_us_comments = t_us[:,-1]
    
    #选择比5000小的数据
    t_us_comments = t_us_comments[t_us_comments<=5000]
    
    print(t_us_comments.max(),t_us_comments.min())
    
    d = 50
    
    bin_nums = (t_us_comments.max()-t_us_comments.min())//d
    
    #绘图
    plt.figure(figsize=(20,8),dpi=80)
    
    plt.hist(t_us_comments,bin_nums)
    
    
    plt.show()

    exer2-2

    import numpy as np
    from matplotlib import  pyplot as plt
    
    us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
    uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
    
    # t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
    t_uk = np.loadtxt(uk_file_path,delimiter=",",dtype="int")
    
    #选择喜欢书比50万小的数据
    t_uk = t_uk[t_uk[:,1]<=500000]
    
    t_uk_comment = t_uk[:,-1]
    t_uk_like = t_uk[:,1]
    
    
    plt.figure(figsize=(20,8),dpi=80)
    plt.scatter(t_uk_like,t_uk_comment)
    
    plt.show()

    五、pandas

    1、为什么要学习pandas

    那么问题来了:numpy已经能够帮助我们处理数据,能够结合matplotlib解决我们数据分析的问题,那么pandas学习的目的在什么地方呢?
     
    numpy能够帮我们处理处理数值型数据,但是这还不够
    很多时候,我们的数据除了数值之外,还有字符串,还有时间序列等
    比如:我们通过爬虫获取到了存储在数据库中的数据
    比如:之前youtube的例子中除了数值之外还有国家的信息,视频的分类(tag)信息,标题信息等
    
    所以,numpy能够帮助我们处理数值,但是pandas除了处理数值之外(基于numpy),还能够帮助我们处理其他类型的数据

    2、

  • 相关阅读:
    java操作生成jar包 和写入jar包
    jboss配置jndi连接池
    windows 域的LDAP查询相关举例
    LDAP error Code 及解决方法
    HDU 6417
    CF1299D Around the World
    codechef Chef and The Colored Grid
    Educational Codeforces Round 82 (Rated for Div. 2)
    CF1237F Balanced Domino Placements
    CF1254E Send Tree to Charlie
  • 原文地址:https://www.cnblogs.com/di2wu/p/10204412.html
Copyright © 2020-2023  润新知