• R语言将没有空格的列数据拆分为空格分割


    1、

    dir()
    dat <- read.table("test.ped")   ## 读取测试数据,ped基因型数据
    dat
    genoList =list()
    for ( i in 1:ncol(dat) ) {      ## 将每一列数据保存为列表的一项
      genoList[[i]]<- dat[,i]
    }
    genoList
    
    length(genoList)
    a1 <- lapply(genoList,function(x){unlist(strsplit(x,""))[seq(1, 2 * dim(dat)[1], 2)]})  ## 将第一个等位基因拆分出来
    a1
    a2 <- lapply(genoList,function(x){unlist(strsplit(x,""))[seq(2, 2 * dim(dat)[1], 2)]})   ##将第二个等位基因拆分出来
    a2
    a1 <- as.data.frame(matrix(unlist(a1), byrow = F, ncol = ncol(dat)))      ## 将第一个等位基因转换为数据框
    a1
    a2 <- as.data.frame(matrix(unlist(a2), byrow = F, ncol = ncol(dat)))      ## 将第二个等位基因转换为数据框
    a2
     
    temp_list <- list()
    
    for (i in 1:ncol(dat)) {             ## 将拆分出来的等位基因合并保存为新的列表
      temp_list[[i * 2 - 1]] = a1[,i]
      temp_list[[i * 2]] = a2[,i]
    }
    temp_list
    
    result <- as.data.frame(matrix(unlist(temp_list), byrow = F, ncol = 2 * ncol(dat)))   ## 将列表转换为数据框,实现拆分
    result
    dat
    > dir()
    [1] "test.ped"
    > dat <- read.table("test.ped")   ## 测试数据
    > dat
      V1 V2 V3 V4 V5 V6
    1 GG CC GG GG GA AA
    2 TT GC CC GG GG AA
    3 TT GC CG GG GG TT
    4 GG GC GG GG GG AA
    > genoList =list()
    > for ( i in 1:ncol(dat) ) {     ## 保存为新列表
    +   genoList[[i]]<- dat[,i]
    + }
    > genoList
    [[1]]
    [1] "GG" "TT" "TT" "GG"
    
    [[2]]
    [1] "CC" "GC" "GC" "GC"
    
    [[3]]
    [1] "GG" "CC" "CG" "GG"
    
    [[4]]
    [1] "GG" "GG" "GG" "GG"
    
    [[5]]
    [1] "GA" "GG" "GG" "GG"
    
    [[6]]
    [1] "AA" "AA" "TT" "AA"
    
    > length(genoList)
    [1] 6
    > a1 <- lapply(genoList,function(x){unlist(strsplit(x,""))[seq(1, 2 * dim(dat)[1], 2)]})   ## 拆分第一个等位基因
    > a1
    [[1]]
    [1] "G" "T" "T" "G"
    
    [[2]]
    [1] "C" "G" "G" "G"
    
    [[3]]
    [1] "G" "C" "C" "G"
    
    [[4]]
    [1] "G" "G" "G" "G"
    
    [[5]]
    [1] "G" "G" "G" "G"
    
    [[6]]
    [1] "A" "A" "T" "A"
    
    > a2 <- lapply(genoList,function(x){unlist(strsplit(x,""))[seq(2, 2 * dim(dat)[1], 2)]})  ## 拆分第二个等位基因
    > a2
    [[1]]
    [1] "G" "T" "T" "G"
    
    [[2]]
    [1] "C" "C" "C" "C"
    
    [[3]]
    [1] "G" "C" "G" "G"
    
    [[4]]
    [1] "G" "G" "G" "G"
    
    [[5]]
    [1] "A" "G" "G" "G"
    
    [[6]]
    [1] "A" "A" "T" "A"
    
    > a1 <- as.data.frame(matrix(unlist(a1), byrow = F, ncol = ncol(dat)))  ## 转换为数据框
    > a1
      V1 V2 V3 V4 V5 V6
    1  G  C  G  G  G  A
    2  T  G  C  G  G  A
    3  T  G  C  G  G  T
    4  G  G  G  G  G  A
    > a2 <- as.data.frame(matrix(unlist(a2), byrow = F, ncol = ncol(dat)))  ## 转换为数据框
    > a2
      V1 V2 V3 V4 V5 V6
    1  G  C  G  G  A  A
    2  T  C  C  G  G  A
    3  T  C  G  G  G  T
    4  G  C  G  G  G  A
    > temp_list <- list()
    > for (i in 1:ncol(dat)) {          ## 合并在新列表中
    +   temp_list[[i * 2 - 1]] = a1[,i]
    +   temp_list[[i * 2]] = a2[,i]
    + }
    > temp_list
    [[1]]
    [1] "G" "T" "T" "G"
    
    [[2]]
    [1] "G" "T" "T" "G"
    
    [[3]]
    [1] "C" "G" "G" "G"
    
    [[4]]
    [1] "C" "C" "C" "C"
    
    [[5]]
    [1] "G" "C" "C" "G"
    
    [[6]]
    [1] "G" "C" "G" "G"
    
    [[7]]
    [1] "G" "G" "G" "G"
    
    [[8]]
    [1] "G" "G" "G" "G"
    
    [[9]]
    [1] "G" "G" "G" "G"
    
    [[10]]
    [1] "A" "G" "G" "G"
    
    [[11]]
    [1] "A" "A" "T" "A"
    
    [[12]]
    [1] "A" "A" "T" "A"
    
    > result <- as.data.frame(matrix(unlist(temp_list), byrow = F, ncol = 2 * ncol(dat)))   ## 转换为数据框
    > result   ## 查看结果
      V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
    1  G  G  C  C  G  G  G  G  G   A   A   A
    2  T  T  G  C  C  C  G  G  G   G   A   A
    3  T  T  G  C  C  G  G  G  G   G   T   T
    4  G  G  G  C  G  G  G  G  G   G   A   A
    > dat
      V1 V2 V3 V4 V5 V6
    1 GG CC GG GG GA AA
    2 TT GC CC GG GG AA
    3 TT GC CG GG GG TT
    4 GG GC GG GG GG AA

    参考:https://zhuanlan.zhihu.com/p/378405836

    2、shell实现

    root@PC1:/home/test# ls
    test.ped
    root@PC1:/home/test# cat test.ped   ## 测试数据
    GG CC GG GG GA AA
    TT GC CC GG GG AA
    TT GC CG GG GG TT
    GG GC GG GG GG AA
    root@PC1:/home/test# sed 's/. / &/g' test.ped   ## 使用sed对“字符空格“”替换为“空格字符空格”
    G G C C G G G G G A AA
    T T G C C C G G G G AA
    T T G C C G G G G G TT
    G G G C G G G G G G AA
    root@PC1:/home/test# sed 's/. / &/g' test.ped | sed 's/.$/ &/'  ## 将最后一个字符替换为空格字符
    G G C C G G G G G A A A
    T T G C C C G G G G A A
    T T G C C G G G G G T T
    G G G C G G G G G G A A
  • 相关阅读:
    事务和锁
    Spring AOP @before@after@around@afterreturning@afterthrowing执行顺序
    免安装绿色版本tomcat的问题
    Myeclipse代码提示及如何设置自动提示
    the field DBMS must be defined
    zip4j 2.0压缩 加密压缩
    HttpClient4.x 上传文件
    转发小程序
    【Maven】使用Maven构建多模块项目
    微信小程序官方示例 官方weui-wxss下载于安装 详解
  • 原文地址:https://www.cnblogs.com/liujiaxin2018/p/15709097.html
Copyright © 2020-2023  润新知