• OTU_Network&calc_otu


      1 # -*- coding: utf-8 -*-
      2 # __author__ = 'JieYap'
      3 from biocluster.agent import Agent
      4 from biocluster.tool import Tool
      5 import os
      6 import types
      7 import subprocess
      8 from biocluster.core.exceptions import OptionError
      9 
     10 
     11 class OtunetworkAgent(Agent):
     12     """
     13     需要calc_otu_network.py
     14     version 1.0
     15     author: JieYao
     16     last_modified:2016.8.1
     17     """
     18     
     19     def __init__(self, parent):
     20         super(OtunetworkAgent, self).__init__(parent)
     21         options = [
     22             {"name": "otutable", "type": "infile", "format": "meta.otu.otu_table, meta.otu.tax_summary_dir"},
     23             {"name": "level", "type": "string", "default": "otu"},
     24             {"name": "envtable", "type": "infile", "format": "meta.otu.group_table"},
     25             {"name": "envlabs", "type": "string", "default": ""}
     26             ]
     27         self.add_option(options)
     28         self.step.add_steps('OtunetworkAnalysis')
     29         self.on('start', self.step_start)
     30         self.on('end', self.step_end)
     31 
     32     def step_start(self):
     33         self.step.OtunetworkAnalysis.start()
     34         self.step.update()
     35         
     36     def step_end(self):
     37         self.step.OtunetworkAnalysis.finish()
     38         self.step.update()
     39         
     40     def gettable(self):
     41         """
     42         根据输入的otu表和分类水平计算新的otu表
     43         :return:
     44         """
     45         if self.option('otutable').format == "meta.otu.tax_summary_dir":
     46             return self.option('otutable').get_table(self.option('level'))
     47         else:
     48             return self.option('otutable').prop['path']
     49         
     50     def check_options(self):
     51         """
     52         重写参数检查
     53         """
     54         if not self.option('otutable').is_set:
     55             raise OptionError('必须提供otu表')
     56         self.option('otutable').get_info()
     57         if self.option('otutable').prop['sample_num'] < 2:
     58             raise OptionError('otu表的样本数目少于2,不可进行网络分析')
     59         if self.option('envtable').is_set:
     60             self.option('envtable').get_info()
     61             if self.option('envlabs'):
     62                 labs = self.option('envlabs').split(',')
     63                 for lab in labs:
     64                     if lab not in self.option('envtable').prop['group_scheme']:
     65                         raise OptionError('envlabs中有不在物种(环境因子)表中存在的因子:%s' % lab)
     66             else:
     67                 pass
     68             if len(self.option('envtable').prop['sample']) < 2:
     69                 raise OptionError('物种(环境因子)表的样本数目少于2,不可进行网络分析')
     70         samplelist = open(self.gettable()).readline().strip().split('	')[1:]
     71         if self.option('envtable').is_set:
     72             self.option('envtable').get_info()
     73             if len(self.option('envtable').prop['sample']) > len(samplelist):
     74                 raise OptionError('OTU表中的样本数量:%s少于物种(环境因子)表中的样本数量:%s' % (len(samplelist),
     75                                   len(self.option('envtable').prop['sample'])))
     76             for sample in self.option('envtable').prop['sample']:
     77                 if sample not in samplelist:
     78                     raise OptionError('物种(环境因子)表的样本中存在OTU表中未知的样本%s' % sample)
     79         table = open(self.gettable())
     80         if len(table.readlines()) < 4 :
     81             raise OptionError('数据表信息少于3行')
     82         table.close()
     83         return True
     84 
     85     def set_resource(self):
     86         """
     87         设置所需资源
     88         """
     89         self._cpu = 2
     90         self._memory = ''
     91         
     92     def end(self):
     93         result_dir = self.add_upload_dir(self.output_dir)
     94         result_dir.add_relpath_rules([
     95                 [".", "", "OTU网络分析结果输出目录"],
     96                 ["./real_node_table.txt", "txt", "OTU网络节点属性表"],
     97                 ["./real_edge_table.txt", "txt", "OTU网络边集属性表"],
     98                 ["./real_dc_otu_degree.txt", "txt", "OTU网络OTU节点度分布表"],
     99                 ["./real_dc_sample_degree.txt", "txt", "OTU网络sample节点度分布表"],
    100                 ["./real_dc_sample_otu_degree.txt", "txt", "OTU网络节点度分布表"],
    101                 ["./network_centrality.txt", "txt", "OTU网络中心系数表"],
    102                 ["./network_attributes.txt", "txt", "OTU网络单值属性表"],
    103                 ])
    104         print self.get_upload_files()
    105         super(OtunetworkAgent, self).end()
    106 
    107 
    108 class OtunetworkTool(Tool):
    109     def __init__(self, config):
    110         super(OtunetworkTool, self).__init__(config)
    111         self._version = "1.0.1"
    112         self.cmd_path = self.config.SOFTWARE_DIR + '/bioinfo/meta/scripts/calc_otu_network.py'
    113         self.env_table = self.get_new_env()
    114         self.otu_table = self.get_otu_table()
    115         self.out_files = ['real_node_table.txt', 'real_edge_table.txt', 'real_dc_otu_degree.txt', 'real_dc_sample_degree.txt', 'real_dc_sample_otu_degree.txt', 'network_centrality.txt', 'network_attributes.txt']
    116         
    117         
    118     def get_otu_table(self):
    119         """
    120         根据调用的level参数重构otu表
    121         :return:
    122         """
    123         if self.option('otutable').format == "meta.otu.tax_summary_dir":
    124             otu_path = self.option('otutable').get_table(self.option('level'))
    125         else:
    126             otu_path = self.option('otutable').prop['path']
    127         if self.option('envtable').is_set:
    128             return self.filter_otu_sample(otu_path, self.option('envtable').prop['sample'],
    129                                           os.path.join(self.work_dir, 'temp_filter.otutable'))
    130         else:
    131             return otu_path
    132     
    133     def filter_otu_sample(self, otu_path, filter_samples, newfile):
    134         if not isinstance(filter_samples, types.ListType):
    135             raise Exception('过滤otu表样本的样本名称应为列表')
    136         try:
    137             with open(otu_path, 'rb') as f, open(newfile, 'wb') as w:
    138                 one_line = f.readline()
    139                 all_samples = one_line.rstrip().split('	')[1:]
    140                 if not ((set(all_samples) & set(filter_samples)) == set(filter_samples)):
    141                     raise Exception('提供的过滤样本集合中存在otu表中不存在的样本all:%s,filter_samples:%s' % (all_samples, filter_samples))
    142                 if len(all_samples) == len(filter_samples):
    143                     return otu_path
    144                 samples_index = [all_samples.index(i) + 1 for i in filter_samples]
    145                 w.write('OTU	' + '	'.join(filter_samples) + '
    ')
    146                 for line in f:
    147                     all_values = line.rstrip().split('	')
    148                     new_values = [all_values[0]] + [all_values[i] for i in samples_index]
    149                     w.write('	'.join(new_values) + '
    ')
    150                 return newfile
    151         except IOError:
    152             raise Exception('无法打开OTU相关文件或者文件不存在')
    153 
    154     def get_new_env(self):
    155         """
    156         根据envlabs生成新的envtable
    157         """
    158         if self.option('envlabs'):
    159             new_path = self.work_dir + '/temp_env_table.xls'
    160             self.option('envtable').sub_group(new_path, self.option('envlabs').split(','))
    161             return new_path
    162         else:
    163             return self.option('envtable').path
    164 
    165     def run(self):
    166         """
    167         运行
    168         """
    169         super(OtunetworkTool, self).run()
    170         self.run_otu_network_py()
    171 
    172     def formattable(self, tablepath):
    173         alllines = open(tablepath).readlines()
    174         if alllines[0][0] == '#':
    175             newtable = open(os.path.join(self.work_dir, 'temp_format.table'), 'w')
    176             newtable.write(alllines[0].lstrip('#'))
    177             newtable.writelines(alllines[1:])
    178             newtable.close()
    179             return os.path.join(self.work_dir, 'temp_format.table')
    180         else:
    181             return tablepath
    182 
    183     def run_otu_network_py(self):
    184         """
    185         运行calc_otu_network.py
    186         """
    187         real_otu_path = self.formattable(self.otu_table)
    188         cmd = self.config.SOFTWARE_DIR + '/program/Python/bin/python '
    189         cmd += self.cmd_path
    190         cmd += ' -i %s -o %s' % (real_otu_path, self.work_dir + '/otu_network')
    191         if self.option('envtable').is_set:
    192             cmd += ' -m %s' % (self.env_table)
    193         self.logger.info('开始运行calc_otu_network生成OTU网络并进行计算')
    194         
    195         try:
    196             subprocess.check_output(cmd, shell=True)
    197             self.logger.info('OTU_Network计算完成')
    198         except subprocess.CalledProcessError:
    199             self.logger.info('OTU_Network计算失败')
    200             self.set_error('运行calc_otu_network.py失败')
    201         allfiles = self.get_filesname()
    202         for i in range(len(self.out_files)):
    203             self.linkfile(allfiles[i], self.out_files[i])
    204         self.end()
    205 
    206     def linkfile(self, oldfile, newname):
    207         """
    208         link文件到output文件夹
    209         :param oldfile: 资源文件路径
    210         :param newname: 新的文件名
    211         :return:
    212         """
    213         newpath = os.path.join(self.output_dir, newname)
    214         if os.path.exists(newpath):
    215             os.remove(newpath)
    216         os.link(oldfile, newpath)
    217 
    218     def get_filesname(self):
    219         files_status = [None, None, None, None, None, None, None]
    220         for paths,d,filelist in os.walk(self.work_dir + '/otu_network'):
    221             for filename in filelist:
    222                 name = os.path.join(paths, filename)
    223                 for i in range(len(self.out_files)):
    224                     if self.out_files[i] in name:
    225                         files_status[i] = name
    226         for i in range(len(self.out_files)):
    227             if not files_status[i]:
    228                 self.set_error('未知原因,结果文件生成出错或丢失')
    229         return files_status
    View Code
      1 # -*- coding: utf-8 -*-
      2 # __author__ = 'JieYao'
      3 import os
      4 import argparse
      5 from biocluster.config import Config
      6 import shutil
      7 import networkx
      8 
      9 def make_env_table(inFile, outFile):
     10     with open(inFile, "r") as tmp_file:
     11         samples_name = tmp_file.readline().rstrip().split('	')
     12     with open('group.txt' , "w") as tmp_file:
     13         tmp_file.write("#sample	group
    ")
     14         for i in range(1,len(samples_name)):
     15             tmp_file.write(samples_name[i]+"	STD
    ") 
     16     return './group.txt'
     17 
     18 parser = argparse.ArgumentParser(description='输入OTU表格,生成OTU网络信息')
     19 parser.add_argument('-i', "--otu_matrix", help="输入的OTU表", required = True)
     20 parser.add_argument('-o', "--output", help="输出文件位置", required = True)
     21 parser.add_argument('-m', "--env_table", help="样本分类表", required = False)
     22 args = vars(parser.parse_args())
     23 
     24 flag = False
     25 inFile = args["otu_matrix"]
     26 outFile = args["output"]
     27 if not args["env_table"]:
     28     env_table = make_env_table(inFile, outFile)
     29     flag = True
     30 else:
     31     env_table = args["env_table"]
     32 if os.path.exists(outFile):
     33     shutil.rmtree(outFile)
     34     
     35 """
     36 执行make_otu_network.py 计算otu网络的相关信息并生成文件
     37 完成后由于make_otu_network.py生成的是一个文件夹,使用os和shutil的命令将文件全部移动到输出路径下
     38 """
     39 command = Config().SOFTWARE_DIR + '/program/Python/bin/python '
     40 command += Config().SOFTWARE_DIR + '/program/Python/bin/make_otu_network.py'
     41 command += ' -i %s -o %s -m %s' %(inFile, outFile, env_table)
     42 os.system(command)
     43 if flag:
     44     os.remove("./group.txt")
     45 for paths,d,filelist in os.walk(outFile):
     46     for filename in filelist:
     47         name = os.path.join(paths, filename)
     48         if "reduced" in name:
     49             os.remove(name)
     50         elif "/otu_network/" in name:
     51             shutil.move(name, outFile)
     52 shutil.rmtree(outFile + '/otu_network')
     53 for paths,d,filelist in os.walk(outFile):
     54     for filename in filelist:
     55         name = os.path.join(paths, filename)
     56         if "props" in name:
     57             os.remove(name)
     58 
     59 """
     60 根据node表建立{节点名字---节点编号}的字典
     61 """
     62 node_name = [""]
     63 node_dictionary = {}
     64 with open(outFile + '/real_node_table.txt', "r") as node_file:
     65     informations = node_file.readlines()
     66     for i in range(1, len(informations)):
     67         tmp = informations[i].rstrip().split("	")
     68         node_dictionary[tmp[0]] = i
     69         node_name += [tmp[0]]
     70 """
     71 开始使用Networkx包建立图
     72 计算OTU网络的属性信息
     73 """
     74 G = networkx.Graph()
     75 with open(outFile + "/real_edge_table.txt" , "r") as edge_file:
     76     informations = edge_file.readlines()
     77     for i in range(1, len(informations)):
     78         tmp = informations[i].rstrip().split("	")
     79         G.add_edge(node_dictionary[tmp[0]], node_dictionary[tmp[1]], weight = eval(tmp[2]))
     80 
     81 
     82 """
     83 用实践测试单独对Sample或者是OTU构图的构图方法,
     84 结果证明这样的构图出来的结果基本上Sample是完全图,
     85 OTU单独构图的意义则不大,所以这种想法……失败了。
     86 """
     87 """
     88 H = networkx.Graph()
     89 with open(outFile + "/real_node_table.txt" , "r") as node_file:
     90     informations = node_file.readlines()
     91     for i in range(1,len(informations)):
     92         tmp = informations[i].rstrip().split("	")
     93         if tmp[2] == "otu_node":
     94             break
     95 position = i
     96 for i in range(position):
     97     for j in range(position):
     98         H.add_edge(i,j,weight=0)
     99         for k in range(position,len(G)):
    100             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
    101                 H.edge[i][j]['weight'] += min(G.edge[i][k]['weight']+G.edge[j][k]['weight'])
    102         if H.edge[i][j]['weight'] == 0:
    103             H.remove_edge(i,j)
    104 
    105 minx = 32767
    106 for i in range(position):
    107     for j in range(position):
    108         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    109             minx = min(minx, H.edge[i][j]['weight'])
    110 
    111 for i in range(position):
    112     for j in range(position):
    113         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    114             H.edge[i][j]['weight'] -= minx
    115             if H.edge[i][j]['weight'] <=0:
    116                 H.remove_edge(i,j)
    117 print H.edges()
    118 
    119 H = networkx.Graph()
    120 with open(outFile + "/real_node_table.txt" , "r") as node_file:
    121     informations = node_file.readlines()
    122     for i in range(1,len(informations)):
    123         tmp = informations[i].rstrip().split("	")
    124         if tmp[2] == "otu_node":
    125             break
    126 position = i
    127 for i in range(position,len(G)):
    128     for j in range(position,len(G)):
    129         H.add_edge(i,j,weight=0)
    130         for k in range(position):
    131             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
    132                 H.edge[i][j]['weight'] += 1
    133         if H.edge[i][j]['weight'] == 0:
    134             H.remove_edge(i,j)
    135 print len(H)
    136 print len(H.edges())
    137 print H.edges()
    138 
    139 minx = 32767
    140 for i in range(position,len(G)):
    141     for j in range(position,len(G)):
    142         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    143             minx = min(minx, H.edge[i][j]['weight'])
    144 
    145 for i in range(position):
    146     for j in range(position):
    147         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    148             H.edge[i][j]['weight'] -= minx
    149             if H.edge[i][j]['weight'] <=0:
    150                 H.remove_edge(i,j)
    151 """
    152 
    153 """3计算属性表,分本3"""
    154 
    155 #节点度中心系数,表示节点在图中的重要性
    156 Degree_Centrality = networkx.degree_centrality(G)
    157 #节点距离中心系数,值越大表示到其他节点距离越近,中心性越高
    158 Closeness_Centrality = networkx.closeness_centrality(G)
    159 #节点介数中心系数,值越大表示通过该节点的最短路径越多,中心性越高
    160 Betweenness_Centrality = networkx.betweenness_centrality(G)
    161 with open(os.path.join(args["output"], "network_centrality.txt"), "w") as tmp_file:
    162     tmp_file.write("Node_ID	Node_Name	Degree_Centrality	")
    163     tmp_file.write("Closeness_Centrality	Betweenness_Centrality
    ")
    164     for i in range(1, len(G)+1):
    165         tmp_file.write(str(i)+"	"+node_name[i]+"	")
    166         tmp_file.write(str(Degree_Centrality[i])+"	")
    167         tmp_file.write(str(Closeness_Centrality[i])+"	")
    168         tmp_file.write(str(Betweenness_Centrality[i])+"
    ")
    169 
    170 
    171 #网络传递性,二分图中应该为0,否则有问题
    172 Transitivity = networkx.transitivity(G)
    173 #网络直径
    174 Diameter = networkx.diameter(G)
    175 #网络平均最短路长度
    176 Average_shortest_path = networkx.average_shortest_path_length(G)
    177 with open(os.path.join(args["output"], "network_attributes.txt"), "w") as tmp_file:
    178     tmp_file.write("Transitivity:"+str(Transitivity)+"
    ")
    179     tmp_file.write("Diameter:"+str(Diameter)+"
    ")
    180     tmp_file.write("Average_shortest_path_length:"+str(Average_shortest_path)+"
    ")
    View Code
  • 相关阅读:
    网页定位导航
    position元素的定位
    节点属性
    css控制换行,断词
    css隐藏多余文字显示...
    重绘和回流
    CSS属性书写顺序
    模拟select
    常用html标签
    clientHeight、scrollHeight和offsetHeight基本用法
  • 原文地址:https://www.cnblogs.com/neverforget/p/5784957.html
Copyright © 2020-2023  润新知