SNP数据(txt)文件可以在此下载:http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/
下载、解压data之后,启动MySQL server,登录进入MySQL
# create database create database snp146all use snp146all # create table:这一步的MySQL script文件可以在上面提到的网站下载 DROP TABLE IF EXISTS `snp146`; /*!40101 SET @saved_cs_client = @@character_set_client */; /*!40101 SET character_set_client = utf8 */; CREATE TABLE `snp146` ( `bin` smallint(5) unsigned NOT NULL, `chrom` varchar(31) NOT NULL, `chromStart` int(10) unsigned NOT NULL, `chromEnd` int(10) unsigned NOT NULL, `name` varchar(15) NOT NULL, `score` smallint(5) unsigned NOT NULL, `strand` enum('+','-') NOT NULL, `refNCBI` blob NOT NULL, `refUCSC` blob NOT NULL, `observed` varchar(255) NOT NULL, `molType` enum('unknown','genomic','cDNA') NOT NULL, `class` enum('unknown','single','in-del','microsatellite','named','mnp','insertion','deletion') NOT NULL, `valid` set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap','by-1000genomes') NOT NULL, `avHet` float NOT NULL, `avHetSE` float NOT NULL, `func` set('unknown','coding-synon','intron','near-gene-3','near-gene-5','ncRNA','nonsense','missense','stop-loss','frameshift','cds-indel','untranslated-3','untranslated-5','splice-3','splice-5') NOT NULL, `locType` enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion','fuzzy') NOT NULL, `weight` int(10) unsigned NOT NULL, `exceptions` set('RefAlleleMismatch','RefAlleleRevComp','DuplicateObserved','MixedObserved','FlankMismatchGenomeLonger','FlankMismatchGenomeEqual','FlankMismatchGenomeShorter','SingleClassLongerSpan','SingleClassZeroSpan','SingleClassTriAllelic','SingleClassQuadAllelic','ObservedWrongFormat','ObservedTooLong','ObservedContainsIupac','ObservedMismatch','MultipleAlignments','NonIntegerChromCount','AlleleFreqSumNot1','SingleAlleleFreq','InconsistentAlleles') NOT NULL, `submitterCount` smallint(5) unsigned NOT NULL, `submitters` longblob NOT NULL, `alleleFreqCount` smallint(5) unsigned NOT NULL, `alleles` longblob NOT NULL, `alleleNs` longblob NOT NULL, `alleleFreqs` longblob NOT NULL, `bitfields` set('clinically-assoc','maf-5-some-pop','maf-5-all-pops','has-omim-omia','microattr-tpa','submitted-by-lsdb','genotype-conflict','rs-cluster-nonoverlapping-alleles','observed-mismatch') NOT NULL, KEY `name` (`name`), KEY `chrom` (`chrom`,`bin`) ) ENGINE=MyISAM DEFAULT CHARSET=latin1; # load data load data infile 'snp146.txt' into table snps fields terminated by ' ' lines terminated by ' ';
这样就制作完成!
然而,另一个问题自然而然地显现:SNP146 database大约占用20+GB的硬盘,所以,需要将其MySQL的存储用的database从default的地方移动到空间更多的地方。这个问题等待下文解决。