##这个可以跑
if(T){
gset <- getGEO("GSE42872",destdir = ".",AnnotGPL = F,getGPL = F)
save(gset,file ="GSE42872_eSet.Rdata")
}
##也就说if(!file.exists(f))等同于if(T)。!file.exists(f)返回的逻辑值是T,才执行后面{}中的内容。file.exist(f)返回FALSE,前面加上!表示否定,即为真
##这个跑了没动静,语法错误。。。哈哈
if(F){
gset <- getGEO("GSE42872",destdir = ".",AnnotGPL = F,getGPL = F)
save(gset,file ="GSE42872_eSet.Rdata")
}
> dir()
[1] "GSE42872_eSet.Rdata" "GSE42872_series_matrix.txt.gz"
> f = "a.txt"
> file.exists(f)
[1] FALSE
> dir()
[1] "a.txt" "GSE42872_eSet.Rdata"
[3] "GSE42872_series_matrix.txt.gz"
> file.exists(f)
[1] TRUE
##从GEO数据库导入GSE文件得到的一个list,其第一个元素才是ExpressionSet对象,元素名就是该GSE号对应的压缩文件名
> gset <- getGEO("GSE42872",destdir = ".",AnnotGPL = F,getGPL = F) ##注释文件和平台文件都不要
Found 1 file(s)
GSE42872_series_matrix.txt.gz
试开URL’https://ftp.ncbi.nlm.nih.gov/geo/series/GSE42nnn/GSE42872/matrix/GSE42872_series_matrix.txt.gz'
Content type 'application/x-gzip' length 768865 bytes (750 KB)
downloaded 750 KB
Parsed with column specification:
cols(
ID_REF = col_double(),
GSM1052615 = col_double(),
GSM1052616 = col_double(),
GSM1052617 = col_double(),
GSM1052618 = col_double(),
GSM1052619 = col_double(),
GSM1052620 = col_double()
)
> class(gset)
[1] "list"
> length(gset)
[1] 1
> class(gset[[1]])
[1] "ExpressionSet"
attr(,"package")
[1] "Biobase"
> gset
$GSE42872_series_matrix.txt.gz
ExpressionSet (storageMode: lockedEnvironment)
assayData: 33297 features, 6 samples
element names: exprs
protocolData: none
phenoData
sampleNames: GSM1052615 GSM1052616 ... GSM1052620 (6 total)
varLabels: title geo_accession ... cell type:ch1 (34 total)
varMetadata: labelDescription
featureData: none
experimentData: use 'experimentData(object)'
pubMedIds: 24469106
Annotation: GPL6244
##如果是已经下好了GSE号对应的原始压缩文件(这个好像不是最原来的raw.data吧),getGEO()可以直接读取之,得到直接就是上述的gset[[1]],就是那个ExpressionSet对象。
> a = getGEO(file = "GSE42872_series_matrix.txt.gz",AnnotGPL = F,getGPL = F)
Parsed with column specification:
cols(
ID_REF = col_double(),
GSM1052615 = col_double(),
GSM1052616 = col_double(),
GSM1052617 = col_double(),
GSM1052618 = col_double(),
GSM1052619 = col_double(),
GSM1052620 = col_double()
)
|=====================================================================================| 100% 1 MB ##1M?好像就是解压缩了一样。。。
> class(a)
[1] "ExpressionSet"
attr(,"package")
[1] "Biobase"
> length(a)
[1] 1
> a
ExpressionSet (storageMode: lockedEnvironment)
assayData: 33297 features, 6 samples
element names: exprs
protocolData: none
phenoData
sampleNames: GSM1052615 GSM1052616 ... GSM1052620 (6 total)
varLabels: title geo_accession ... cell type:ch1 (34 total)
varMetadata: labelDescription
featureData: none
experimentData: use 'experimentData(object)'
pubMedIds: 24469106
Annotation: GPL6244
> b = getGEO(file = "GSE42872_series_matrix.txt.gz") ##如果不加参数Annotation及getGPL,好像会额外下载文件GPL6244.soft这个文件?不知道是啥?注释文件嘛?
Parsed with column specification:
cols(
ID_REF = col_double(),
GSM1052615 = col_double(),
GSM1052616 = col_double(),
GSM1052617 = col_double(),
GSM1052618 = col_double(),
GSM1052619 = col_double(),
GSM1052620 = col_double()
)
File stored at:
C:UsersHWCAppDataLocalTempRtmpGqMiOr/GPL6244.soft
|=====================================================================================| 100% 96 MB
##下不下GPL6244.soft这个文件是AnnotGPL这个参数定的,默认为T,下载
> c = getGEO(file = "GSE42872_series_matrix.txt.gz",getGPL = F)
Parsed with column specification:
cols(
ID_REF = col_double(),
GSM1052615 = col_double(),
GSM1052616 = col_double(),
GSM1052617 = col_double(),
GSM1052618 = col_double(),
GSM1052619 = col_double(),
GSM1052620 = col_double()
)
> class(c)
[1] "ExpressionSet"
attr(,"package")
[1] "Biobase"
> d = getGEO(file = "GSE42872_series_matrix.txt.gz",AnnotGPL = F)
Parsed with column specification:
cols(
ID_REF = col_double(),
GSM1052615 = col_double(),
GSM1052616 = col_double(),
GSM1052617 = col_double(),
GSM1052618 = col_double(),
GSM1052619 = col_double(),
GSM1052620 = col_double()
)
Using locally cached version of GPL6244 found here:
C:UsersHWCAppDataLocalTempRtmpGqMiOr/GPL6244.soft
|=====================================================================================| 100% 96 MB
> ls()
[1] "a" "b" "c" "d" "gset"
> rm(c(b,c))
Error in rm(c(b, c)) : ...要么含名字,要么是字符串 ##c()里边如果是字符串,要有引号。。。
> rm(b,c) ##还以为要用c(),删除多个,直接逗号隔开列出即可
> ls()
[1] "a" "d" "gset"
> history(20) ##history()查看历史命令