import numpy
from biom.table import Table
============================================================================================================
# 10*4 matrix, [0, 39]
data = numpy.arange(40).reshape(10, 4)
sample_ids = ['S%d' % i for i in range(4)]
observ_ids = ['O%d' % i for i in range(10)]
sample_metadata = [{'environment': 'A'}, {'environment': 'B'},
{'environment': 'A'}, {'environment': 'B'}]
observ_metadata = [{'taxonomy': ['Bacteria', 'Firmicutes']},
{'taxonomy': ['Bacteria', 'Firmicutes']},
{'taxonomy': ['Bacteria', 'Proteobacteria']},
{'taxonomy': ['Bacteria', 'Proteobacteria']},
{'taxonomy': ['Bacteria', 'Proteobacteria']},
{'taxonomy': ['Bacteria', 'Bacteroidetes']},
{'taxonomy': ['Bacteria', 'Bacteroidetes']},
{'taxonomy': ['Bacteria', 'Firmicutes']},
{'taxonomy': ['Bacteria', 'Firmicutes']},
{'taxonomy': ['Bacteria', 'Firmicutes']}]
# construct table
table = Table(data, observ_ids, sample_ids, observ_metadata, sample_metadata, table_id='myTestTable')
# use add_metadata. This is ADD, NOT CONSTRUCT!
#table.add_metadata(sample_metadata, axis='sample')
# print info of table
table
print(table)
#
print column names
print(table.ids())
print(table.ids(axis='sample'))
# print row names
print(table.ids(axis='observation'))
# print number of non-zero entries. Now it’s 39.
print(table.nnz)
============================================================================================================
data = numpy.asarray([[2, 0], [6, 1]])
table = Table(data, ['O1', 'O2'], ['S1', 'S2'])
# normalize by ‘sample’(column)
new_table = table.norm(inplace=False)
#
normalize by row
new_table = table.norm(axis='observation', inplace=False)
# if inplace=True, table will change too. Now it stay unchanged. If set table1 = table before norm, and change table1 now, then table will change, too(shallow copy).
============================================================================================================
# filter with a function
filter_f = lambda values, id_, md: md['environment'] == 'A'
env_a = normed.filter(filter_f, axis='sample', inplace=False)
============================================================================================================
# divide by 'environment'
part_f = lambda id_, md: md['environment']
env_tables = table.partition(part_f, axis='sample')
# make a sum
for partition, env_table in env_tables:
print(partition, env_table.sum('sample'))
============================================================================================================
# add-metadata
============================================================================================================
biom convert -i table.biom -o table.from_biom_w_consensuslineage.txt --to-tsv --header-key taxonomy --output-metadata-id "ConsensusLineage"
# 1. convert .biom to .txt
# special header-key
biom convert -i otu_table.biom -o otu_table.txt --to-tsv --header-key taxonomy
# 2. fix in excel
# 3. convert back
biom convert -i otu_table.txt -o new_otu_table.biom --to-hdf5 --table-type="OTU table" --process-obs-metadata taxonomy
============================================================================================================
biom summarize-table -i INPUT.biom --qualitative -o OUTPUT.txt