这里以小鼠为例子下载相应的注释文件,基因组版本为mm10
- 下载refGene.txt.gz
网址:http://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/
wget http://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/refGene.txt.gz - 下载gene_info.gz文件
网址:ftp://ftp.ncbi.nih.gov/gene/DATA/
wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz - 使用写好的脚本对下载文件进行处理,小鼠tax id 10090
less -S gene_info.gz | awk -F " " '{if($1=="10090") print $2" "$3" "$5}' >../mm10.ID
less -S refGene.txt.g |awk -F " " '{print $2" "$13}' >mm10.name
perl gene.pl ../mm10.ID mm10.name mm10-id-gene
perl turn.pl mm10-id-gene refGene.txt.gz Gene.bed.2
less Gene.bed.2 |sort -k 1,1 -k 2,2n > Gene.bed
rm Gene.bed.2
perl Gene2exon_intron.pl refGene.txt.gz
sort -k 1,1 -k 2,2n exon.bed>Exon.bed
sort -k 1,1 -k 2,2n intron.bed>Intron.bed
rm exon.bed intron.bed
perl get4Intergenic.pl Gene.bed 2000
- GO and KEGG
less -S /ldfssz1/ST_BIGDATA/USER/yueyao/12.Pro/04.RNASeq/gene2go.gz |grep "^10090" >mm10.go
perl /ifs4/BC_PUB/biosoft/pipeline/RNA/RNA_RNAdenovo/RNA_RNAdenovo_2016a/Annotation/annot2goa.pl /ifs4/BC_PUB/biosoft/db/Pub/go/RNA/20171220/gene_ontology.1_2.obo mm10.annot /ldfssz1/ST_BIGDATA/USER/yueyao/16.Pipeline/chipseq_test/mmdatabase/GO/mm10
perl dealGOObo.pl -go /ifs4/BC_PUB/biosoft/db/Pub/go/RNA/20171220/gene_ontology.1_2.obo -prefix go
les /ifs4/BC_PUB/biosoft/db/Pub/kegg/RNA/84.0/animal.id.annot.xls |grep ^mmu >mm10.kegg
les mm10.kegg |perl -ne '$line=$_;$id=(split)[0];$gene=(split/:/,$id)[1];if($line=~/(Kd{4})/g){print "$gene $1
"}' >mm10.ko
- Genome索引
染色体大小
wget http://hgdownload.soe.ucsc.edu/goldenPath/mm10/bigZips/mm10.chrom.sizes