• Gviz


    1) Introduction

    为了理解基因组数据,通常旨在在基因组浏览器中绘制这样的数据,以及各种基因组注释特征,例如基因或转录物模型,CpG岛,重复区域等。这些功能可以从ENSEMBL或UCSC等公共数据库中提取,也可以在内部生成或策划。许多当前可用的基因组浏览器在显示基因组注释数据方面做了合理的工作,并且存在从R内部连接到其中一些的选项(例如,使用rtracklayer包)。然而,这些解决方案都没有提供完整的Rgraphics系统以多种不同方式显示大数值数据的灵活性。 Gviz软件包旨在通过提供结构化可视化框架来绘制基因组坐标上的任何类型的数据来缩小这一差距。它松散地基于GenomeGraphs包,但为了提高性能和灵活性,重新构建了完整的类层次结构以及所有绘图方法,所有绘图都是使用网格图形系统完成的,并且几个专门的注释类允许集成来自UCSC或ENSEMBL等来源的公开可用的基因组注释数据。

    2)Basic Features

    if("Gviz" %in% rownames(installed.packages()) == FALSE) {source("http://bioconductor.org/biocLite.R");biocLite("Gviz")}
    suppressMessages(library(Gviz))
    ls('package:Gviz')
    library(GenomicRanges)
    data(cpgIslands)     #以GC岛数据集为例
    class(cpgIslands)
    chr <- as.character(unique(seqnames(cpgIslands)))
    gen <- genome(cpgIslands)
    atrack <- AnnotationTrack(cpgIslands, name = "CpG")
    plotTracks(atrack)
    

     

    gtrack <- GenomeAxisTrack()
    plotTracks(list(gtrack, atrack))
    

     

    itrack <- IdeogramTrack(genome = gen, chromosome = chr)
    plotTracks(list(itrack, gtrack, atrack))
    

     

    由基本特征可以知道,GVIZ和ggplot2 好像 道理是一样的,一层累积一层添加元素。我们再以下面数据集为例:

    data(geneModels)
    grtrack <- GeneRegionTrack(geneModels, genome = gen,chromosome = chr, name = "Gene Model")
    plotTracks(list(itrack, gtrack, atrack, grtrack))
    

     

    plotTracks(list(itrack, gtrack, atrack, grtrack),from = 26700000, to = 26750000)
    

     

    plotTracks(list(itrack, gtrack, atrack, grtrack),extend.left = 0.5, extend.right = 1e+06)
    

     

    plotTracks(list(itrack, gtrack, atrack, grtrack),
               extend.left = 0.5, extend.right = 1e+06, col = NULL)
    

     

    library(BSgenome.Hsapiens.UCSC.hg19)
    strack <- SequenceTrack(Hsapiens, chromosome = chr)
    plotTracks(list(itrack, gtrack, atrack, grtrack,
                    strack), from = 26591822, to = 26591852, cex = 0.8)
    

     

    set.seed(255)
    lim <- c(26700000, 26750000)
    coords <- sort(c(lim[1], sample(seq(from = lim[1],to = lim[2]), 99), lim[2]))
    dat <- runif(100, min = -10, max = 10)
    dtrack <- DataTrack(data = dat, start = coords[-length(coords)],
                        end = coords[-1], chromosome = chr, genome = gen,
                        name = "Uniform")
    plotTracks(list(itrack, gtrack, atrack, grtrack,
                     dtrack), from = lim[1], to = lim[2])
    

    plotTracks(list(itrack, gtrack, atrack, grtrack,
                    dtrack), from = lim[1], to = lim[2], type = "histogram")
    

     

    3)Plotting parameters(画图参数调整)

    3.1 设置参数

    grtrack <- GeneRegionTrack(geneModels, genome = gen,
                               chromosome = chr, name = "Gene Model", transcriptAnnotation = "symbol",
                               background.title = "brown")
    head(displayPars(grtrack))
    plotTracks(list(itrack, gtrack, atrack, grtrack))
    

     

    plotTracks(list(itrack, gtrack, atrack, grtrack),
               background.panel = "#FFFEDB", background.title = "darkblue")
    

     

    3.2 Schemes(主题)

    getOption("Gviz.scheme")
    scheme <- getScheme()
    scheme$GeneRegionTrack$fill <- "salmon"
    scheme$GeneRegionTrack$col <- NULL
    scheme$GeneRegionTrack$transcriptAnnotation <- "transcript"
    addScheme(scheme, "myScheme")
    options(Gviz.scheme = "myScheme")
    grtrack <- GeneRegionTrack(geneModels, genome = gen,
                                chromosome = chr, name = "Gene Model")
    plotTracks(grtrack)
    options(Gviz.scheme = "default")
    grtrack <- GeneRegionTrack(geneModels, genome = gen,
                               chromosome = chr, name = "Gene Model", transcriptAnnotation = "symbol")
    

    3.3 Plotting direction (方向)

    plotTracks(list(itrack, gtrack, atrack, grtrack),reverseStrand = TRUE)
    

     

    4) Track classes

     4.1 GenomeAxisTrack

    axisTrack <- GenomeAxisTrack()
    plotTracks(axisTrack, from = 1e+06, to = 9e+06)
    

     

    axisTrack <- GenomeAxisTrack(range = IRanges(start = c(2e+06, 4e+06), end = c(3e+06, 7e+06), names = rep("N-stretch",+ 2)))
    plotTracks(axisTrack, from = 1e+06, to = 9e+06)
    

     

     plotTracks(axisTrack, from = 1e+06, to = 9e+06, showId = TRUE)
    

     

    plotTracks(axisTrack, from = 1e+06, to = 9e+06, add53 = TRUE,add35 = TRUE)
    

     

    plotTracks(axisTrack, from = 1e+06, to = 9e+06, add53 = TRUE,
               add35 = TRUE, littleTicks = TRUE)
    

     

    plotTracks(axisTrack, from = 1e+06, to = 9e+06, exponent = 4)
    

     

    plotTracks(axisTrack, from = 1e+06, to = 9e+06, labelPos = "below")
    

     

     plotTracks(axisTrack, from = 1e+06, to = 9e+06, scale = 0.5)
    

     

    plotTracks(axisTrack, from = 1e+06, to = 9e+06, scale = 0.5,
               labelPos = "below")
    

     

    4.2) IdeogramTrack

    ideoTrack <- IdeogramTrack(genome = "hg19", chromosome = "chrX")
    plotTracks(ideoTrack, from = 8.5e+07, to = 1.29e+08)
    

     

    plotTracks(ideoTrack, from = 8.5e+07, to = 1.29e+08,showId = FALSE)
    

     

    plotTracks(ideoTrack, from = 8.5e+07, to = 1.29e+08,
               showId = FALSE, showBandId = TRUE, cex.bands = 0.5)
    

     

    4.3 DataTrack

    data(twoGroups)
    dTrack <- DataTrack(twoGroups, name = "uniform")
    plotTracks(dTrack)
    

     

    plotTracks(dTrack, type = c("boxplot", "a", "g"))
    

     

    总之,选择不同的画图参数,生成不同的图形
    

     

    Data Grouping(对于分组数据)

    plotTracks(dTrack, groups = rep(c("control", "treated"),
                                    each = 3), type = c("a", "p", "confint"))
    

     

    plotTracks(dTrack, groups = rep(c("control", "treated"),
                                    each = 3), type = c("a", "p"), legend = TRUE)
    

     

    data(dtHoriz)
    dtHoriz <- dtHoriz[1:6, ]
    plotTracks(dtHoriz, type = "horiz", groups = rownames(values(dtHoriz)),
               showSampleNames = TRUE, cex.sampleNames = 0.6,
               separator = 1)
    

     

    总之,选择不同的图形参数类型,生成不同的图形类别
    

     

    从数据文件中构建 DataTrack objects

    bgFile <- system.file("extdata/test.bedGraph", package = "Gviz")
    dTrack2 <- DataTrack(range = bgFile, genome = "hg19",
                           type = "l", chromosome = "chr19", name = "bedGraph")
    plotTracks(dTrack2)
    

     

    bamFile <- system.file("extdata/test.bam", package = "Gviz")
    dTrack4 <- DataTrack(range = bamFile, genome = "hg19", type = "l", name = "Coverage", window = -1, chromosome = "chr1")
    plotTracks(dTrack4, from = 189990000, to = 1.9e+08)
    

     

    plotTracks(dTrack4, chromosome = "chr1", from = 189891483,to = 190087517)
    

     

    Data transformations

    dat <- sin(seq(pi, 10 * pi, len = 500))
    dTrack.big <- DataTrack(start = seq(1, 1e+05, len = 500),
                             width = 15, chromosome = "chrX", genome = "hg19",
                             name = "sinus", data = sin(seq(pi, 5 * pi, len = 500)) *
                            runif(500, 0.5, 1.5))
    plotTracks(dTrack.big, type = "hist")
    

     

    plotTracks(dTrack.big, type = "hist", window = 50)
    

     

    plotTracks(dTrack.big, type = "hist", window = -1,windowSize = 2500)
    

     

    plotTracks(dTrack, groups = rep(c("control", "treated"),
                                     each = 3), type = c("b"), aggregateGroups = TRUE)
    

     

    4.4 AnnotationTrack

    aTrack <- AnnotationTrack(start = c(10, 40, 120),
                              width = 15, chromosome = "chrX", strand = c("+","*", "-"),
                              id = c("Huey", "Dewey", "Louie"),
                              genome = "hg19", name = "foo")
    plotTracks(aTrack)
    

     

    plotTracks(aTrack, shape = "box", featureAnnotation = "id")
    

     

    plotTracks(aTrack, shape = "ellipse", featureAnnotation = "id", fontcolor.feature = "darkblue")
    

     

    aTrack.groups <- AnnotationTrack(start = c(50, 180,260, 460, 860, 1240), 
                                     width = c(15, 20, 40, 100,00, 20), 
                                     chromosome = "chrX",
                                    strand = rep(c("+", "*", "-"), c(1, 3, 2)), 
                                    group = rep(c("Huey", "Dewey", "Louie"), c(1, 3, 2)), 
                                    genome = "hg19",
                                    name = "foo")                                                                                                                                                                   + "Dewey", "Louie"), c(1, 3, 2)), genome = "hg19",name = "foo")
    plotTracks(aTrack.groups, groupAnnotation = "group")
    

     

    plotTracks(aTrack.groups, groupAnnotation = "group",just.group = "above")
    

     

    aTrack.stacked <- AnnotationTrack(start = c(50, 180,260, 800, 600, 1240), width = c(15, 20, 40, 100,
                                       500, 20), chromosome = "chrX", strand = "*",
                                      group = rep(c("Huey", "Dewey", "Louie"), c(1, 3, 2)), 
                                      genome = "hg19", name = "foo")
    plotTracks(aTrack.stacked, groupAnnotation = "group")
    

     

    plotTracks(aTrack.stacked, stacking = "dense")
    

     

    data("denseAnnTrack")
    plotTracks(denseAnnTrack, showOverplotting = TRUE)
    

     

    4.5,4.6,4.7,4.8,4.9回来再写

    4.9 AlignmentsTrack

    afrom <- 2960000
    ato <- 3160000
    alTrack <- AlignmentsTrack(system.file(package = "Gviz", "extdata", "gapped.bam"), isPaired = TRUE)
    bmt <- BiomartGeneRegionTrack(genome = "hg19", chromosome = "chr12",start = afrom, end = ato, filter = list(with_ox_refseq_mrna = TRUE),stacking = "dense")
    plotTracks(c(bmt, alTrack), from = afrom, to = ato, chromosome = "chr12")
    

     

    plotTracks(c(bmt, alTrack), from = afrom, to = ato,
               chromosome = "chr12", min.height = 0, coverageHeight = 0.08,
               minCoverageHeight = 0)
    

     

    plotTracks(c(alTrack, bmt), from = afrom, to = ato, chromosome = "chr12", type = "coverage")
    

     

    plotTracks(c(bmt, alTrack), from = afrom + 12700,to = afrom + 15200, chromosome = "chr12")
    

     

    plotTracks(c(bmt, alTrack), from = afrom + 12700,
              to = afrom + 15200, chromosome = "chr12", type = c("coverage","sashimi"))
    

     

    introns <- GRanges("chr12", IRanges(start = c(2973662,2973919), end = c(2973848, 2974520)))
    plotTracks(c(bmt, alTrack), from = afrom + 12700,
               to = afrom + 15200, chromosome = "chr12", type = c("coverage","sashimi"), sashimiFilter = introns)
    

     

    plotTracks(c(bmt, alTrack), from = afrom + 12700,to = afrom + 15200, chromosome = "chr12", 
               type = c("coverage", "sashimi"), sashimiFilter = introns, sashimiFilterTolerance = 5L)
    

     

    plotTracks(c(bmt, alTrack), from = afrom + 12700,
               to = afrom + 15200, chromosome = "chr12", reverseStacking = TRUE,
                col.mates = "purple", col.gap = "orange", type = "pileup")
    

     

    alTrack <- AlignmentsTrack(system.file(package = "Gviz","extdata", "gapped.bam"),isPaired = FALSE)
    plotTracks(c(bmt, alTrack), from = afrom + 12700,to = afrom + 15200, chromosome = "chr12")
    

     

    4.10 Creating tracks from UCSC data

    from <- 65921878
    to <- 65980988
    knownGenes <- UcscTrack(genome = "mm9", chromosome = "chrX",
                             track = "knownGene", from = from, to = to, trackType = "GeneRegionTrack",
                            rstarts = "exonStarts", rends = "exonEnds", gene = "name",
                            symbol = "name", transcript = "name", strand = "strand",
                             fill = "#8282d2", name = "UCSC Genes")
    refGenes <- UcscTrack(genome = "mm9", chromosome = "chrX",
                          track = "xenoRefGene", from = from, to = to,
                           trackType = "GeneRegionTrack", rstarts = "exonStarts",
                          rends = "exonEnds", gene = "name", symbol = "name2",
                           transcript = "name", strand = "strand", fill = "#8282d2",
                          stacking = "dense", name = "Other RefSeq")
    ensGenes <- UcscTrack(genome = "mm9", chromosome = "chrX",
                           track = "ensGene", from = from, to = to, trackType = "GeneRegionTrack",
                           rstarts = "exonStarts", rends = "exonEnds", gene = "name",
                          symbol = "name2", transcript = "name", strand = "strand",
                          fill = "#960000", name = "Ensembl Genes")
    cpgIslands <- UcscTrack(genome = "mm9", chromosome = "chrX",
                              track = "cpgIslandExt", from = from, to = to,
                              trackType = "AnnotationTrack", start = "chromStart",
                              end = "chromEnd", id = "name", shape = "box",
                              fill = "#006400", name = "CpG Islands")
    snpLocations <- UcscTrack(genome = "mm9", chromosome = "chrX",
                              track = "snp128", from = from, to = to, trackType = "AnnotationTrack",
                               start = "chromStart", end = "chromEnd", id = "name",
                              feature = "func", strand = "strand", shape = "box",
                              stacking = "dense", fill = "black", name = "SNPs")
    conservation <- UcscTrack(genome = "mm9", chromosome = "chrX",
                              track = "Conservation", table = "phyloP30wayPlacental",
                               from = from, to = to, trackType = "DataTrack",
                               start = "start", end = "end", data = "score",
                               type = "hist", window = "auto", col.histogram = "darkblue",
                               fill.histogram = "darkblue", ylim = c(-3.7, 4),
                               name = "Conservation")
    gcContent <- UcscTrack(genome = "mm9", chromosome = "chrX",
                            track = "GC Percent", table = "gc5Base", from = from,
                            to = to, trackType = "DataTrack", start = "start",
                            end = "end", data = "score", type = "hist", window = -1,
                            windowSize = 1500, fill.histogram = "black",
                            col.histogram = "black", ylim = c(30, 70), name = "GC Percent")
    axTrack <- GenomeAxisTrack()
    idxTrack <- IdeogramTrack(genome = "mm9", chromosome = "chrX")
    plotTracks(list(idxTrack, axTrack, knownGenes, refGenes,
                     ensGenes, cpgIslands, gcContent, conservation,
                     snpLocations), from = from, to = to, showTitle = FALSE)
    

     

  • 相关阅读:
    介绍几个程序员常去的网站
    你这辈子,为什么富不起来?!
    Bug解决:mysql 创建表字段Double类型长度
    RedisTemplate5种数据结构操作
    StringRedisTemplate与RedisTemplate区别
    Redis客户端信息的存取
    Anaconda安装pygame
    springboot启动报错
    idea上传项目到github
    Docker安装报错
  • 原文地址:https://www.cnblogs.com/djx571/p/9712104.html
Copyright © 2020-2023  润新知