plink格式文件如何提取位点、删除位点、提取样本、删除样本

1、准备测试数据10个样本，10个位点

[root@linuxprobe test]# ls
test.map  test.ped
[root@linuxprobe test]# cat test.ped  ## 10个样本，10行
DOR     sample01        0       0       0       -9      G G     C C     G G     G G     A G     A A     G G     A A     G G     A A
DOR     sample02        0       0       0       -9      G G     G C     G G     G G     G G     A A     G G     C A     G G     A A
DOR     sample03        0       0       0       -9      G G     C C     G G     G G     G G     A A     G G     A A     G G     A A
DOR     sample04        0       0       0       -9      G G     C C     G G     G G     G G     A A     G G     A A     A G     G A
DOR     sample05        0       0       0       -9      G G     C C     G G     G G     G G     G A     G G     C A     A G     G A
DOR     sample06        0       0       0       -9      G G     C C     G G     G G     G G     A A     G G     A A     A A     G G
DOR     sample07        0       0       0       -9      G G     C C     G G     A G     A A     G A     G G     C A     G G     A A
DOR     sample08        0       0       0       -9      G G     C C     G G     A G     A A     A A     G G     A A     A G     G A
DOR     sample09        0       0       0       -9      G G     G C     G G     G G     G G     A A     G G     C A     G G     A A
DOR     sample10        0       0       0       -9      G G     C C     G G     G G     A G     G A     G G     C C     A G     G A
[root@linuxprobe test]# cat test.map  ## 10个位点
1       snp1    0       55910
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1       snp5    0       312707
2       snp6    0       23713
2       snp7    0       24881
2       snp8    0       249725
2       snp9    0       287787
2       snp10   0       2407856

2、提取特定位点

[root@linuxprobe test]# cut -f 2 test.map | sed -n '2p;5p;7p' > testsite.txt
[root@linuxprobe test]# cat testsite.txt  ## 提取三个位点  
snp2
snp5
snp7
[root@linuxprobe test]# plink --file test --extract testsite.txt --recode tab --out testsite;rm *log *.nosex  ##利用plink --extract 选项

[root@linuxprobe test]# ls
test.map test.ped testsite.map testsite.ped testsite.txt
[root@linuxprobe test]# cat testsite.map ## 已经提取三个位点
1 snp2 0 85204
1 snp5 0 312707
2 snp7 0 24881
[root@linuxprobe test]# cat testsite.ped
DOR sample01 0 0 0 -9 C C A G G G
DOR sample02 0 0 0 -9 G C G G G G
DOR sample03 0 0 0 -9 C C G G G G
DOR sample04 0 0 0 -9 C C G G G G
DOR sample05 0 0 0 -9 C C G G G G
DOR sample06 0 0 0 -9 C C G G G G
DOR sample07 0 0 0 -9 C C A A G G
DOR sample08 0 0 0 -9 C C A A G G
DOR sample09 0 0 0 -9 G C G G G G
DOR sample10 0 0 0 -9 C C A G G G

## 提取特定位点只需要提供snpID即可，就是map文件对应的第二列

3、删除特定位点

[root@linuxprobe test]# rm  testsite.map  testsite.ped  ## 删除上一步测试结果
[root@linuxprobe test]# cat testsite.txt
snp2
snp5
snp7
[root@linuxprobe test]# ls
test.map  test.ped  testsite.txt
[root@linuxprobe test]# plink --file test --exclude testsite.txt --recode tab --out testsite;rm *.log *.nosex  ## 利用plink exclude选项剔除位点
[root@linuxprobe test]# cat testsite.map ## 已经剔除snp2、snp5、snp7位点。
1       snp1    0       55910
1       snp3    0       122948
1       snp4    0       203750
2       snp6    0       23713
2       snp8    0       249725
2       snp9    0       287787
2       snp10   0       2407856
[root@linuxprobe test]# cat testsite.ped
DOR     sample01        0       0       0       -9      G G     G G     G G     A A     A A     G G     A A
DOR     sample02        0       0       0       -9      G G     G G     G G     A A     C A     G G     A A
DOR     sample03        0       0       0       -9      G G     G G     G G     A A     A A     G G     A A
DOR     sample04        0       0       0       -9      G G     G G     G G     A A     A A     A G     G A
DOR     sample05        0       0       0       -9      G G     G G     G G     G A     C A     A G     G A
DOR     sample06        0       0       0       -9      G G     G G     G G     A A     A A     A A     G G
DOR     sample07        0       0       0       -9      G G     G G     A G     G A     C A     G G     A A
DOR     sample08        0       0       0       -9      G G     G G     A G     A A     A A     A G     G A
DOR     sample09        0       0       0       -9      G G     G G     G G     A A     C A     G G     A A
DOR     sample10        0       0       0       -9      G G     G G     G G     G A     C C     A G     G A
## 剔除位点只需要提供snpID，就是map文件对应的第二列

4、提取特定样本数据

rm testsite.* ## 删除上一步测试结果
[root@linuxprobe test]# cut -f 1-2 test.ped | sed -n '2p;6p;9p' > testind.txt ## 提取测试3个样本
[root@linuxprobe test]# cat testind.txt  
DOR     sample02
DOR     sample06
DOR     sample09
[root@linuxprobe test]# plink --file test --keep testind.txt --recode tab --out testind; rm *.log *.nosex ## 利用 --keep选项提取 
[root@linuxprobe test]# ls
testind.map  testind.ped  testind.txt  test.map  test.ped
[root@linuxprobe test]# cat testind.map ## 位点没变
1       snp1    0       55910
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1       snp5    0       312707
2       snp6    0       23713
2       snp7    0       24881
2       snp8    0       249725
2       snp9    0       287787
2       snp10   0       2407856
[root@linuxprobe test]# cat testind.ped  ## 提取了三个样本
DOR     sample02        0       0       0       -9      G G     G C     G G     G G     G G     A A     G G     C A     G G     A A
DOR     sample06        0       0       0       -9      G G     C C     G G     G G     G G     A A     G G     A A     A A     G G
DOR     sample09        0       0       0       -9      G G     G C     G G     G G     G G     A A     G G     C A     G G     A A
## 提取样本需要提供familyID和individualID，就是ped文件对应的前两列

5、删除特定样本数据

[root@linuxprobe test]# rm testind.map  testind.ped ## 删除上一步测试结果
[root@linuxprobe test]# ls
testind.txt  test.map  test.ped
[root@linuxprobe test]# cat testind.txt 
DOR     sample02
DOR     sample06
DOR     sample09
[root@linuxprobe test]# plink --file test --remove testind.txt --recode tab --out testind; rm *.log *.nosex ## 利用 --remove选项删除样本
[root@linuxprobe test]# cat testind.map ## 位点不变
1       snp1    0       55910
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1       snp5    0       312707
2       snp6    0       23713
2       snp7    0       24881
2       snp8    0       249725
2       snp9    0       287787
2       snp10   0       2407856
[root@linuxprobe test]# cat testind.ped  ## 已经删除sample02、sample06、sample09样本
DOR     sample01        0       0       0       -9      G G     C C     G G     G G     A G     A A     G G     A A     G G     A A
DOR     sample03        0       0       0       -9      G G     C C     G G     G G     G G     A A     G G     A A     G G     A A
DOR     sample04        0       0       0       -9      G G     C C     G G     G G     G G     A A     G G     A A     A G     G A
DOR     sample05        0       0       0       -9      G G     C C     G G     G G     G G     G A     G G     C A     A G     G A
DOR     sample07        0       0       0       -9      G G     C C     G G     A G     A A     G A     G G     C A     G G     A A
DOR     sample08        0       0       0       -9      G G     C C     G G     A G     A A     A A     G G     A A     A G     G A
DOR     sample10        0       0       0       -9      G G     C C     G G     G G     A G     G A     G G     C C     A G     G A

## 删除样本需要提供familyID和individualID，就是ped文件对应的前两列

相关阅读:
WebSocket
Redis
Memcached
Python实现支付宝在线支付
 RabbitMQ
linux内核优化
 kafka资源
 推荐相关
 机器学习好网站
 逻辑回归(logistic regression)的本质——极大似然估计
原文地址：https://www.cnblogs.com/liujiaxin2018/p/13776086.html