• vcf2maf


    1. install VEP

    (1) prerequisite

    su

    apt-get update

    apt-get upgrade

    apt-get install -y perl

    #perl packages install

    cpanm DBI

    cpanm Archive::Zip

    cpanm DBD::mysql  (no DBD:mysql in conda)

    #c complier not found

    apt-get install build-essential

    conda update --all -c conda/label/cf201901

    conda update --all -c conda-forge/label/cf201901

    #xlocale.h not found on Ubuntu while installing

    ln -s /usr/include/locale.h /usr/include/xlocale.h

    #conda install zlib,htslib,samtools,liftover(export conda PATH in .bashrc)

    conda install -c bioconda/label/cf201901 ucsc-liftover

     (2) download VEP

    apt-get install -y build-essential git libncurses-dev

    mkdir .vep

    export VEP_PATH=$HOME/vep

    export VEP_DATA=$HOME/.vep

    export VER=96

    #download VEP version96

    curl -L -O https://github.com/Ensembl/ensembl-vep/archive/release/96.zip

    unzip 96.zip; rm 96.zip; mv ensembl-vep-release-96 $VEP_PATH

    export PERL5LIB=$VEP_PATH:$PERL5LIB    //htslib and tabix must be in the same folder so that cache_convert can work

    cd $VEP_PATH

    #download cache file

     #don't use rsync too slow and always error

    cd $VEP_DATA

    curl -O ftp://ftp.ensembl.org/pub/release-96/variation/vep/homo_sapiens_vep_96_GRCh37.tar.gz

    tar -izxf homo_sapiens_vep_96_GRCh37.tar.gz -C $VEP_DATA

    #download API 

    perl INSTALL.pl --AUTO a --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA --NO_HTSLIB

    #download refeference FASTA

    perl INSTALL.pl --AUTO f --SPECIES homo_sapiens --ASSEMBLY GRCh37 --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA

    #convert cache

    perl convert_cache.pl --species homo_sapiens --version $VER\_GRCh37 --dir $VEP_DATA

    #Download the ExAC r0.3.1 VCF

    cd $VEP_DATA

    curl -L ftp://ftp.broadinstitute.org:/pub/ExAC_release/release0.3.1/subsets/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz > $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz

    echo"##FILTER=<ID=AC_Adj0_Filter,Description="Only low quality genotype calls containing alternate alleles are present">"> header_line.tmp
    curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed
    bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS
    $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz

    mv -f $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz
    tabix -p vcf
    $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz

    ./vep --species homo_sapiens --assembly GRCh37 --offline --no_progress --no_stats --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --vcf --minimal --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --dir $VEP_DATA --fasta $VEP_DATA/homo_sapiens/$VER\_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz --input_file examples/homo_sapiens_GRCh37.vcf --output_file examples/homo_sapiens_GRCh37.vep.vcf --polyphen b --af --af_1kg --af_esp --regulatory
    error:

    2. install vcf2maf

    export VCF2MAF_URL=`curl -sL https://api.github.com/repos/mskcc/vcf2maf/releases | grep -m1 tarball_url | cut -d" -f4`

    curl -L -o mskcc-vcf2maf.tar.gz $VCF2MAF_URL; tar -zxf mskcc-vcf2maf.tar.gz; cd mskcc-vcf2maf-*

    perl vcf2maf.pl --man

     

    ---恢复内容结束---

    echo"##FILTER=<ID=AC_Adj0_Filter,Description="Only low quality genotype calls containing alternate alleles are present">"> header_line.tmp
    curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed
    bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS
    $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz

  • 相关阅读:
    获取本机IP,用户代理
    10 种机器学习算法的要点(附 Python)(转载)
    怎么查找执行比较慢的sql语句-DBA给的建议
    .net 调度器怎么实现心跳(socket除了他,没选择吧)
    分布式多计算机调度平台
    续【C# 以管理员方式启动Winform,进而使用管理员控制Windows Service】
    C# 以管理员方式启动Winform,进而使用管理员控制Windows Service
    SqlServer2008根据现有表,获取该表的分区创建脚本
    SqlServer常用命令
    创建分区表过程
  • 原文地址:https://www.cnblogs.com/xiaoxiaoxiaoxue/p/10877636.html
Copyright © 2020-2023  润新知