• 大数据分析及应用 坏坏


    Python 数组与曲线绘制(一)

    • 第1关 使用函数值填充列表
    class Solution:
        def solve(self, s, e):
            """
            :type s, e: int, int
            :rtype: list
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            import math
            dx = (e-s)/40
            xlist = [s+i*dx for i in range(0,41)]
            def f(x):
                return 1/math.sqrt(2*math.pi)*math.exp(-0.5*x**2)
            ylist = [f(x) for x in xlist]
            return ylist
            ##********** End **********#
    
    • 第2关 填充数组(循环版本)
    class Solution:
            def solve(self, s, e):
                    """
                    :type s, e: int, int
                    :rtype: numpy.ndarray
                    """
                    #请在此按照“编程要求”填写代码
                    #********** Begin *********#
                    import numpy as np
    
                    xlist = np.zeros(41)
                    ylist = np.zeros(41)
                    for i in range(41):
                            xlist[i]=s+i*(e - s)/40
                            ylist[i]=1/np.sqrt(2*np.pi)*np.exp(-0.5*xlist[i]**2)
                    return ylist
                    ##********** End **********#
    
    • 第3关 填充数组(向量化版本)
    class Solution:
        def solve(self, s, e):
            """
            :type s, e: int, int
            :rtype xlist, ylist: numpy.array, numpy.array
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            import numpy as np
            xlist = np.linspace(s, e, 41)
            ylist = 1/np.sqrt(2*np.pi)*np.exp(-0.5*xlist**2)
            return xlist, ylist
            ##********** End **********#
    
    • 第4关 绘制函数
    class Solution:
        def solve(self, s, e):
            """
            :type s, e: int, int
            :rtype: None
            """
            #请在此按照“编程要求”添加代码
            #********** Begin *********#
            from matplotlib import pyplot as plt
            import math
            dx = (e - s) / 40
            xlist = [s+i*dx for i in range(0,41)]
            def f(x):
                return 1/math.sqrt(2*math.pi)*math.exp(-0.5*x**2)
            ylist = [f(x) for x in xlist]
            plt.plot(xlist, ylist)
            plt.show()
            ##********** End **********#
            plt.savefig("step4/stu_img/student.png")
    
    • 第5关 函数作用于向量
    class Solution:
        def solve_1(self, v):
            """
            :type v: list
            :rtype: list
            """
            #请在此按照“编程要求”添加代码
            #********** Begin *********#
            import math
            def f(x):
                return x**3+x*math.exp(x)+1
            y = [f(a) for a in v]
            return y
            ##********** End **********#
        def solve_2(self, v):
            """
            :type v: list
            :rtype: numpy.array
            """
            #请在此按照“编程要求”添加代码
            #********** Begin *********#
            import numpy as np
            xlist = np.array(v)
            ylist = xlist**3+xlist*np.exp(xlist)+1
            return ylist
            ##********** End **********#
    
    • 第6关 手工模拟执行向量表达式
    class Solution:
        def solve_1(self, x, t):
            """
            :type x, t: list, list
            :rtype: list
            """
            #请在此按照“编程要求:使用math库实现”添加代码
            #********** Begin *********#
            import math
            y = []
            for xi, ti in zip(x, t):
                y.append(math.cos(math.sin(xi)) + math.exp(1/ti))
            return y
            ##********** End **********#
        def solve_2(self, x, t):
            """
            :type x, t: list, list
            :rtype: numpy.array
            """
            #请在此按照“编程要求:使用numpy库实现”添加代码
            #********** Begin *********#
            import numpy as np
            y_1 = np.cos(np.sin(x))+np.exp(1/np.array(t))
            return y_1
            ##********** End **********#
    

    Python 数组与曲线绘制(二)

    • 第1关 展示数组切片
    [0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.  1.1 1.2 1.3 1.4 1.5 1.6 1.7
     1.8 1.9 2.  2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. ]
    [0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.  1.1 1.2 1.3 1.4 1.5 1.6 1.7
     1.8 1.9 2.  2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. ]
    [0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.  1.1 1.2 1.3 1.4 1.5 1.6 1.7
     1.8 1.9 2.  2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8]
    [0.  0.5 1.  1.5 2.  2.5 3. ]
    [0.2 0.8 1.4 2.  2.6]
    
    • 第2关 绘制公式
    class Solution:
        def solve(self, v0, g):
            """
            :type v0, g: int, int
            :rtype: None
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            import numpy as np
            from matplotlib import pyplot as plt
            #生成横坐标
            x = np.linspace(0.0, 2*v0/g, 50)
            #生成纵坐标
            y = v0*x-1/2*g*x*x
            #描绘函数图像
            plt.plot(x, y)
            #添加横坐坐标信息
            plt.xlabel('time(s)')
            #添加纵坐标信息
            plt.ylabel('height(m)')
            plt.show()
            ##********** End **********#
            plt.savefig("step2/stu_img/student.png")
    
    • 第3关 绘制多参数公式
    class Solution:
        def solve(self, v0):
            """
            :type v0: List[int]
            :rtype: None
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            import numpy as np
            from matplotlib import pyplot as plt
            g = 9.81
            for v in v0:
                t = np.linspace(0,2.0*v/g,50)
                y = v*t-0.5*g*t**2
                plt.plot(t,y)
            plt.xlabel('time(s)')
            plt.ylabel('height(m)')
            plt.show()
            ##********** End **********#
            plt.savefig("step3/stu_img/student.png")
    
    • 第4关 指定图中轴的范围
    class Solution:
        def solve(self, v0):
            """
            :type v0: List[int]
            :rtype: None
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            import numpy as np
            from matplotlib import pyplot as plt
    
            g = 9.81
            t1 = 0
            y1 = 0
            for v in v0:
                t = np.linspace(0,2.0*v/g,50)
                if max(t) > t1:
                    t1 = max(t)
    
                y = v*t-0.5*g*t**2
                if max(y) > y1:
                    y1 = max(y)
                plt.plot(t,y)
    
            plt.axis([0, t1, 0, y1*1.1])
            plt.xlabel('time(s)')
            plt.ylabel('height(m)')
            plt.show()
    
            ##********** End **********#
            plt.savefig("step4/stu_img/student.png")
    
    • 第5关 绘制精确和不精确的华氏-摄氏转换公式
    class Solution:
        def solve(self, s, e):
            """
            :type s, e: int, int
            :rtype: None
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            import numpy as np
            from matplotlib import pyplot as plt
            f = np.linspace(s, e, 50)
            c1 = (f - 30) / 2
            c2 = (f - 32) * 5 / 9
            plt.plot(f, c1, 'r.', f, c2, 'b-')
            plt.show()
            ##********** End **********#
            plt.savefig("step5/stu_img/student.png")
    
    • 第6关 绘制球的轨迹
    class Solution:
        def solve(self, y0, theta, v0):
            """
            :type y0, theta, v0: int, int, int
            :rtype: None
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            import numpy as np
            from matplotlib import pyplot as plt
            g = 9.81
            theta = theta/180.0*np.pi
            a = -1/(2*v0**2)*g/(np.cos(theta)**2)
            b = np.tan(theta)
            c = y0
            delta = np.sqrt(b**2-4*a*c)
            x0 = (-b-delta)/(2*a)
            x1 = (-b+delta)/(2*a)
            xmin = min(x0, x1)
            xmax = max(x0, x1)
            x = np.linspace(0,xmax,51)
            y = x*np.tan(theta)-1/(2*v0**2)*g*(x**2)/(np.cos(theta)**2)+y0
            plt.plot(x,y)
            plt.axis([min(x),max(x),0,max(y)*1.1])
            plt.show()
            ##********** End **********#
            plt.savefig("step6/stu_img/student.png")
    
    • 第7关 绘制文件中的双列数据
    class Solution:
        def solve(self, file):
            """
            :type file: str
            :rtype: None
            """
            #请在此按照“编程要求”填写代码
            #********** Begin *********#
            from matplotlib import pyplot as plt
            ifile = open(file, 'r')
            x, y = [], []
            for line in ifile:
                a = line.split()
                x.append(float(a[0]))
                y.append(float(a[1]))
            print(sum(y)/len(y), max(y), min(y))
            plt.plot(x, y)
            plt.show()
            ifile.close()
            ##********** End **********#
            plt.savefig("step7/stu_img/student.png")
    

    Python 数组与曲线绘制(三)

    • 第1关 绘图函数 - 绘制 sin 函数
    # 请绘制sin函数曲线
    
    import matplotlib
    matplotlib.use("Agg") # 设置平台绘图环境,勿删
    
    import matplotlib.pyplot as plt
    # 请在此添加代码实现函数细节   #
    # ********** Begin *********#
    x = [0,30,60,90,120,150,180,210,240,270,300,330,360]
    y = [0,0.5,0.866,1,0.866,0.5,0,-0.5,-0.866,-1,-0.866,-0.5,0]
    plt.plot(x,y,'.')
    plt.show()
    # ********** End **********#
    plt.savefig('picture/step0/fig0.png') #存储输出图像,勿删
    
    • 第2关 绘图与保存 - 抛物线函数曲线
    # 请绘制抛物线曲线
    import matplotlib
    matplotlib.use("Agg")
    
    def f(x):
        # 请在此添加代码实现函数细节   #
        # ********** Begin1 *********#
        x = list(range(0,51,1))
        y = []
        for i in range(0,len(x)):
            y.append(3*(v[i]**2) + 2*(v[i]) + 1)
        return y
        # ********** End1 **********#
    
    #   请在此添加代码绘制曲线并存储图像#
    # ********** Begin2 *********#
    import matplotlib.pyplot as plt
    x = list(range(0,51,1))
    y = []
    for i in range(0,len(x)):
        y.append(3*(x[i]**2) + 2*(x[i]) + 1)
    plt.plot(x,y,'r--')
    plt.show()
    plt.savefig('picture/step1/fig1.png')
    # ********** End2 **********#
    
    • 第3关 数组计算与向量化处理 - 函数曲线绘制与坐标处理
    # 请绘制函数曲线
    import matplotlib
    matplotlib.use("Agg")
    #   请在此添加实现代码   #
    # ********** Begin *********#
    import numpy as np
    import matplotlib.pyplot as plt
    t = np.linspace(0,3,50)
    y = t**2*np.exp(-t**2)
    plt.plot(t,y)
    plt.show()
    plt.savefig('picture/step2/fig2.png')
    # ********** End **********#
    
    • 第4关 图例与坐标设置 - 绘制多条曲线
    #请在同一坐标系中绘制两条曲线
    import matplotlib
    matplotlib.use("Agg")
    
    #   请在此添加实现代码   #
    # ********** Begin *********#
    import numpy as np
    import matplotlib.pyplot as plt
    t = np.linspace(0,3,50)
    y1 = t**2*np.exp(-t**2)
    y2 = t**4*np.exp(-t**2)
    plt.plot(t,y1,'r--')
    plt.plot(t,y2,'b-o')
    plt.title('Plotting two curves in the same plot')
    plt.xlabel('t')
    plt.ylabel('y')
    plt.legend(['y1','y2'])
    plt.savefig('picture/step3/fig3.png')
    # ********** End **********#
    
    • 第5关 向量化处理 - 绘制函数图形
    # 请编写代码实现向量化帽函数并绘制函数曲线
    import matplotlib
    matplotlib.use("Agg")
    #   请在此添加实现代码   #
    # ********** Begin *********#
    import numpy as np
    import matplotlib.pyplot as plt
    def H3(x):
        return np.where(x<0,0,(np.where(x<1,x,(np.where(x<2,2-x,0)))))
    x = np.linspace(-3,5,1000)
    y = H3(x)
    plt.title('Plotting hat func in this plot')
    plt.plot(x,y,'b-')
    plt.show()
    plt.savefig('picture/step4/fig4.png')
    # ********** End **********#
    

    Python 绘图进阶

    • 第1关 柱状图 - 商品房销售价格统计图
    # 请编写代码绘制住宅商品房平均销售价格柱状图
    import matplotlib
    matplotlib.use("Agg")
    #  请在此添加实现代码  #
    # ********** Begin *********#
    import matplotlib.pyplot as plt
    from numpy import *
    xstring = '2015 2014 2013 2012 2011     \
               2010 2009 2008 2007 2006     \
               2005 2004 2003 2002 2001    2000'
    ystring = '12914 11826 12997 12306.41 12327.28 \
                11406 10608    8378 8667.02 8052.78 \
                6922.52    5744 4196 4336 4588    4751'
    y = ystring.split()
    y.reverse()
    y = [float(e) for e in y]
    xlabels = xstring.split()
    xlabels.reverse()
    x = range(len(xlabels))
    plt.xticks(x, xlabels, rotation = 45)
    plt.yticks(range(4000,13500,1000))
    plt.ylim(4000,13500)
    plt.bar(x, y, color = '#800080')
    plt.savefig('picture/step1/fig1.png')
    # ********** End **********#
    
    • 第2关 并列柱状图 - 商品房销售价格统计图
    # -*- coding: utf-8 -*-
    import matplotlib
    import re
    matplotlib.use("Agg")
     
    import matplotlib.pyplot as plt
    import numpy as np
     
    xstring = '2015 2014 2013 2012 2011     \
               2010 2009 2008 2007 2006     \
               2005 2004 2003 2002 2001    2000' #x轴标签
     
    n = 6
    ystring = ['']*n #y轴对应的6组数据
    ystring[0] = '6793    6324    6237    5790.99    5357.1    5032    4681    3800    3863.9    3366.79    3167.66    2778    2359    2250    2170    2112'
    ystring[1] = '6473    5933    5850    5429.93    4993.17    4725    4459    3576    3645.18    3119.25    2936.96    2608    2197    2092    2017    1948'
    ystring[2] = '15157    12965    12591    11460.19    10993.92    10934    9662    7801    7471.25    6584.93    5833.95    5576    4145    4154    4348    4288'
    ystring[3] = '12914    11826    12997    12306.41    12327.28    11406    10608    8378    8667.02    8052.78    6922.52    5744    4196    4336    4588    4751'
    ystring[4] = '9566    9817    9777    9020.91    8488.21    7747    6871    5886    5773.83    5246.62    5021.75    3884    3675.14    3488.57    3273.53    3260.38'
    ystring[5] = '4845    5177    4907    4305.73    4182.11    4099    3671    3219    3351.44    3131.31    2829.35    2235    2240.74    1918.83    2033.08    1864.37'
     
    labels = ['Commercial housing', 'Residential commercial housing',
              'high-end apartments', 'Office Building', 'Business housing', 'Others'] #图例标签
    colors = ['#ff7f50', '#87cefa', '#DA70D6', '#32CD32', '#6495ED', '#FF69B4'] #指定颜色
     
    #  请在此添加实现代码  #
    # ********** Begin *********#
    x_labels=re.findall(r'\b\d+\b',xstring)[::-1]
    ylist=[]
    for y in ystring:
        ylist.append(list(map(float,re.findall(r'[0-9]+\.?[0-9]*',y)))[::-1]) #或者使用y.split()
     
    bar_width = 0.8
    xindex=np.arange(1,92,6)
        
    fig, ax = plt.subplots()
    for i in range(6):
        ax.bar(xindex+bar_width*i, ylist[i], bar_width ,color=colors[i])
        
    ax.set_xlim(-1,98) #闭区间
    plt.xticks(xindex+bar_width*2.5,x_labels,rotation=45)
    ax.set_ylim(1450,15300)
    plt.yticks(np.arange(2000,16000,2000))
    plt.legend(labels,loc='upper left')
    plt.title('Selling Prices of Six Types of Housing')
     
    plt.savefig('picture/step2/fig2.png')
     
    # ********** End **********#
    
    • 第3关 饼状图 - 2010 全国人口普查数据分析
    # 请绘制育龄妇女的受教育程度分布饼图
    import matplotlib
    matplotlib.use("Agg")
    #  请在此添加实现代码  #
    # ********** Begin *********#
    import matplotlib.pyplot as plt
    labels = ['none', 'primary', 'junior', 'senior', 'specialties', 'bachelor', 'master'] # 标签
    colors = ['red','orange','yellow','green','purple','blue','black'] #指定楔形颜色
    womenCount = [2052380, 11315444, 20435242, 7456627, 3014264, 1972395, 185028]
    explode = [0,0,0.1,0,0,0,0] # 确定突出部分
    plt.pie(womenCount, explode=explode, labels=labels, shadow=True,colors=colors)
    plt.axis('equal')  # 用于显示为一个长宽相等的饼图
    plt.savefig('picture/step3/fig3.png')
    # ********** End **********#
    
    • 第4关 多子图绘制 - 2010 全国人口普查数据分析
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    import numpy as np
    labels = ['none', 'primary', 'junior', 'senior', 'specialties', 'bachelor', 'master'] # 标签
    womenCount = [2052380, 11315444, 20435242, 7456627, 3014264, 1972395, 185028]
    birthMen = [2795259, 12698141, 13982478, 2887164, 903910, 432333, 35915]
    birthWomen = [2417485, 11000637, 11897674, 2493829, 786862, 385718, 32270]
    liveMen = [2717613, 12477914, 13847346, 2863706, 897607, 429809, 35704]
    liveWomen = [2362007, 10854232, 11815939, 2480362, 783225, 384158, 32136]
    #  请在此添加实现代码  #
    # ********** Begin *********#
    x = np.arange(len(labels))
    birth = np.array(birthMen) + np.array(birthWomen)
    live = np.array(liveMen) + np.array(liveWomen)
    plt.figure(figsize=[14,5]) #设置画布大小
    plt.subplot(121)
    birthrate = (1.0*live) / (1.0*np.array(womenCount))
    plt.plot(x, birthrate, 'r')
    plt.xticks(x, labels)
    plt.subplot(122)
    liverate = (1.0*live) / (1.0*birth) * 100
    plt.plot(x, liverate, 'b')
    plt.xticks(x, labels)
    plt.savefig('picture/step4/fig4.png')
    # ********** End **********#
    

    Python数据可视化之折线图

    • 第1关 折线图的绘制与优化
    # -*- coding: utf-8 -*-
    import pandas as pd #用于生成满足绘图要求的数据格式
    import numpy as np #用于展示横坐标
    from matplotlib import pyplot as plt #用于绘制折线图
    
    population = pd.read_csv(r"LineChart/level1/csv/world-population.csv") #返回值为二维标记数据结构 DataFrame
    def plot():
        # ********* Begin *********#
        fig,ax=plt.subplots()
        my_x_ticks = np.arange(1960, 2011, 5)
        plt.xticks(my_x_ticks)
        plt.grid(b=True, color='r', linestyle='--', linewidth=1, alpha=0.3, axis='x', which="major") #设置网格
        ax.plot(population["Year"],population["Population"], linewidth=1, c='#00CC88', marker='*', markersize=4) #绘制点和折线
        ax.set_xlabel("Year", fontsize=12)  #设置x轴标签
        ax.set_ylabel("Population", fontsize=12)
        # ********* End *********#
        plt.savefig('LineChart/level1/studentanswer/world-population.png') #保存为png格式
        plt.close() #关闭画布窗口
    

    Python数据可视化之柱形图

    • 第1关 “大胃王”比赛数据柱形图绘制——绘制柱形图的基本步骤
    # -*- coding: utf-8 -*-
    import pandas as pd
    from matplotlib import pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages 
    hot_dog = pd.read_csv(r"matplotlib_bar/csv/hot-dog-contest-winners.csv")
    
    def plot(): 
        # ********* Begin *********#
        fig, ax = plt.subplots() #subplots返回画布和子图  
        ax.bar(hot_dog["Year"],hot_dog["Dogs eaten"]) #绘制柱形图,第一个参数为x轴变量,第二个参数为y轴变量  
        plt.show()  
    
        # ********* End *********#
        plt.savefig('matplotlib_bar/studentfile/studentanswer/level_1/US.png')
        plt.close()
    
    • 第2关 “大胃王”比赛数据柱形图绘制——柱形图展示优化
    # -*- coding: utf-8 -*-
    import pandas as pd
    from matplotlib import pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages 
    hot_dog = pd.read_csv(r"matplotlib_bar/csv/hot-dog-contest-winners.csv")
    
    def plot(): 
        # ********* Begin *********#
        fig, ax = plt.subplots()
        ax.bar(hot_dog["Year"],hot_dog["Dogs eaten"],width=[0.6],color=unitedStatesColor())
        plt.rcParams['figure.figsize'] = (8.0, 4.0)
        ax.set_xlabel("Year")  #设置x轴标签  
        ax.set_ylabel("Dogs Eaten")  #设置y轴标签  
        ax.set_title("Hotdog game scores 1980-2010") #设置标题  
        ax.set_xlim(1979,2011) 
        plt.rcParams['figure.figsize'] = (8.0, 4.0)
        plt.show()   
        # ********* End *********#
        plt.savefig('matplotlib_bar/studentfile/studentanswer/level_2/US.png')
        plt.close()
    
    def unitedStatesColor():
        # ********* Begin *********#
        list=[]  
        for i in hot_dog["Country"]:  
            if i=="United States": 
                list.append("#DB7093") #打破记录的年份显示为粉红色  
            else:  
                list.append("#5F9F9F") #其余年份显示为灰绿色  
        return list 
    
        # ********* End *********#
    

    Python数据可视化之散点图

    • 第1关 美国犯罪率数据散点图绘制——散点图的基本绘制步骤
    # -*- coding: utf-8 -*-
    import pandas as pd #用于生成满足绘图要求的数据格式
    from matplotlib import pyplot as plt #用于绘制散点图
    import statsmodels.api as sm #用于局部加权回归
    from matplotlib.backends.backend_pdf import PdfPages
    crime=pd.read_csv(r"matplotlibScatter/csv/crimeRatesByState2005.csv") #返回值为二维标记数据结构 DataFrame
    def plot():
        # ********* Begin *********#
        fig,ax=plt.subplots() #subplots返回画布和子图  
        crime2=crime[~crime['state'].isin(['District of Columbia','United States'])] #获取没有全美平均值和华盛顿特区的犯罪率数据  
        ax.plot(crime2["murder"],crime2["burglary"],"*",color="#00CC88") 
        ax.set_xlabel("crime murder", fontsize=12)  #设置x轴标签  
        ax.set_ylabel("crime burglary", fontsize=12) 
        ax.set_xlim(0,10) #x轴范围从0到10  
        ax.set_ylim(0,1200) 
        plt.show()
    
    
        # ********* End *********#
        plt.savefig('matplotlibScatter/studentanswer/level_1/crime.png') #保存为png格式
        plt.close() #关闭画布窗口
    
    • 第2关 美国犯罪率数据散点图绘制——局部加权回归
    # -*- coding: utf-8 -*-
    import pandas as pd #用于生成满足绘图要求的数据格式
    from matplotlib import pyplot as plt#用于绘制散点图
    import statsmodels.api as sm #用于局部加权回归
    
    crime=pd.read_csv(r"matplotlibScatter/csv/crimeRatesByState2005.csv") #返回值为二维标记数据结构 DataFrame
    def plot():
        # ********* Begin *********#
    
        plt.figure(figsize=(8,4))
        fig,ax=plt.subplots()
        crime2=crime[~crime['state'].isin(['District of Columbia','United States'])]
        lowess = sm.nonparametric.lowess(crime2["burglary"],crime2["murder"])
        ax.plot( lowess[ :,0],lowess[ :,1])
        ax.plot(crime2["murder" ], crime2["burglary"],"*",color="#00CC88")
        ax.set_xlabel("crime murder" ,fontsize=12)
        ax.set_ylabel("crime burglary" ,fontsize=12)
        ax.set_title("美国谋杀率和入室盗窃率",fontproperties="SimHei",fontsize=16)
        ax.set_xlim(0,10) 
        ax.set_ylim(0,1200)
        plt.show()
    
        # ********* End *********#
        plt.savefig('matplotlibScatter/studentanswer/level_2/crime.png') #保存为png格式
        plt.close() #关闭画布窗口
    

    Python数据可视化之多维量法(MDS)

    • 第1关 美国国家教育统计中心数据——降维
    # -*- coding: utf-8 -*-
    import pandas as pd #用于生成满足绘图要求的数据格式
    from sklearn.manifold import MDS #用于MDS降维
    import matplotlib.pyplot as plt #用于绘制撒点图
    from sklearn.cluster import KMeans #用于Kmeans聚类
    from scipy.spatial import distance #用于计算获取距离矩阵
    edu=pd.read_csv(r"MDS/csv/education.csv") #读取csv数据,返回值为二维标记数据结构 DataFrame
    def plot():
        # ********* Begin *********#
        edu_x=edu.iloc[:,1:7] #选择edu中的第 1 列到第 6 列  
        DM_dist = distance.squareform(distance.pdist(edu_x, metric="euclidean")) #计算距离矩阵 
        clf2 = MDS(n_components=2,dissimilarity="precomputed") 
        edu_t2 = clf2.fit_transform(DM_dist)  
        fig,ax=plt.subplots() 
        ax.scatter(edu_t2[:,0],edu_t2[:,1]) 
        names=list(edu.iloc[:,0]) 
        for i in range(len(names)):  
            plt.annotate(names[i], xy = (edu_t2[:,0][i],edu_t2[:,1][i]), xytext=(-20, 5), textcoords='offset points') 
        # ********* End *********#
        plt.savefig("MDS/studentanswer/level_1/education.png")
        plt.close()
    
    • 第2关 美国国家教育统计中心数据——分别按特征和聚类结果着色
    # -*- coding: utf-8 -*-
    import pandas as pd #用于生成满足绘图要求的数据格式
    from sklearn.manifold import MDS #用于MDS降维
    import matplotlib.pyplot as plt #用于绘制撒点图
    from sklearn.cluster import KMeans #用于Kmeans聚类
    from scipy.spatial import distance #用于计算获取距离矩阵
    edu=pd.read_csv(r"MDS/csv/education.csv") #读取csv数据,返回值为二维标记数据结构 DataFrame
    def plot():
        # ********* Begin *********#
        edu_x=edu.iloc[:,1:7] #选择edu中的第 1 列到第 6 列  
        DM_dist = distance.squareform(distance.pdist(edu_x, metric="euclidean")) #计算距离矩阵 
        clf2 = MDS(n_components=2,dissimilarity="precomputed") 
        edu_t2 = clf2.fit_transform(DM_dist)  
        fig,ax=plt.subplots()   
        reading_colors_list=[] 
        average=sum(edu_x["reading"])/len(edu_x["reading"]) #计算阅读平均值  
        for i in range(0,len(edu_x["reading"])):  
            if edu_x["reading"][i] < average:  
                reading_colors_list.append("#DB7093") #小于平均值的数据为粉红色,并添加到颜色列表
            else:  
                reading_colors_list.append("#5F9F9F") #大于平均值的数据为灰绿色,并添加到颜色列
        ax.scatter(edu_t2[:,0],edu_t2[:,1],color=reading_colors_list) 
        names=list(edu.iloc[:,0]) #选择州名这一列数据  
        for i in range(len(names)):  
            plt.annotate(names[i], xy = (edu_t2[:,0][i],edu_t2[:,1][i]), xytext=(-20, 5), textcoords='offset points',color=reading_colors_list[i])
        plt.show()   
        # ********* End *********#
        plt.savefig("MDS/studentanswer/level_2/education.png")
        plt.close()
    
  • 相关阅读:
    为 DropDownList 选项添加背景或样式
    杂七杂八——Name与x:Name的关系
    原创Godaddy帐户取消信用卡或PayPla绑定,防止无端扣费[图文教程]
    dota中名词解释
    在 JavaScript 中如何创建多行字符串(JavaScript Multiline String)
    HDU 4035 Maze(概率DP)
    获取想要得到的风格设计属性描述————Resources.Theme类函数public TypedArray obtainStyledAttributes()
    Android 异步更新UI —— Handler 【复杂的数据操作另起新线程然后在当前UI线程更新结果】
    超链接中文乱码问题
    java中的final变量
  • 原文地址:https://www.cnblogs.com/bad5/p/16303807.html
Copyright © 2020-2023  润新知