#coding:utf-8 __author__ = 'similarface' from collections import defaultdict PMRAdata=defaultdict(list) for line in open('/Users/similarface/Documents/PMRA_Marker_List_Detailed2.txt','r'): if not line.startswith("affy_snp_id"): lines=line.strip().split(' ') PMRAdata[lines[1]].append(lines[2]) wegeneRawData=defaultdict(list) for line in open('/Users/similarface/Documents/wegeneRawData.txt','r'): if not line.startswith("#"): lines=line.strip().split(' ') try: wegeneRawData[lines[1]].append(lines[2]) except Exception,e: pass print(wegeneRawData.keys()) v23andmeData=defaultdict(list) for line in open('/Users/similarface/Documents/genome_wang_mian_Full_20151021231213.txt','r'): if not line.startswith("#"): lines=line.strip().split(' ') v23andmeData[lines[1]].append(lines[2]) PMRAdrawdata={} for k,v in PMRAdata.items(): PMRAdrawdata[k]=len(v) wegeneRawdrawdata={} for k,v in wegeneRawData.items(): wegeneRawdrawdata[k]=len(v) v23andmePMRAdrawdata={} for k,v in v23andmeData.items(): v23andmePMRAdrawdata[k]=len(v) import numpy as np,array from matplotlib import pyplot as plt plt.figure(figsize=(9,6)) X = [str(i) for i in np.arange(22)+1]+['X','Y','MT'] #X=np.arange(22)+1 #X是1,2,3,4,5,6,7,8,柱的个数 # numpy.random.uniform(low=0.0, high=1.0, size=None), normal #uniform均匀分布的随机数,normal是正态分布的随机数,0.5-1均匀分布的数,一共有n个 Y=[] for i in X: Y.append(PMRAdrawdata[i]) Y1=[] for i in X: Y1.append(wegeneRawdrawdata[i]) Y2=[] for i in X: Y2.append(v23andmePMRAdrawdata[i]) Y=np.array(Y) Y1=np.array(Y1) Y2=np.array(Y2) plt.bar(np.arange(len(X)),Y,width=0.3,facecolor = 'green',edgecolor = 'white') plt.bar(np.arange(len(X))+0.3,Y1,width = 0.3,facecolor = 'blue',edgecolor = 'white') plt.bar(np.arange(len(X))+0.6,Y2,width = 0.3,facecolor = 'red',edgecolor = 'white') plt.xticks(np.arange(len(X)),X) plt.legend(('PMRA','wegene','23andme')) plt.xlabel("chr") plt.ylabel("Number of Markers") plt.title(u"PMRA wegene 23andme 各染色体分布") plt.show()
#coding:utf-8 __author__ = 'similarface' import numpy as np,array from matplotlib import pyplot as plt plt.figure(figsize=(9,6)) X = [str(i) for i in np.arange(22)+1]+['X','Y','MT'] Y=np.array([196416,289003,70031,411185,79632,394154]) plt.bar(np.arange(6)+0.1,Y,width=0.3,facecolor = 'green',edgecolor = 'white') plt.xticks(np.arange(6),['P_0_0.05','P_0.05_1','w_0_0.05','w_0.05_1','23me_0_0.05','23me_0.05_1']) k=[902676.0,902676.0,596768.0,596768.0,610565.0,610565.0] i=0 X=np.arange(6) for x,y in zip(X,Y): plt.text(x+0.25, y+0.1, '%.2f' % (y*100/k[i])+"%", ha='center', va= 'bottom') i=i+1 plt.xlabel(u"频率标志") plt.ylabel("Number of Markers") plt.title(u"PMRA wegene 23andme 频率分布") plt.show()