SVA project

This is my first time to use SVA, for recalibration project.

Project URL: Sequence Variant Analyzer

A sample .gsap file is looked like the following:

############################################################
#SequenceVariantAnalyzer project file.
#Authors: Dongliang Ge & David B. Goldstein
#Duke Institute for Genome Sciences & Policy, Center for Human Genome Variation
#Note: lines after the commenting symbol (#) will be ignored in the analysis.
############################################################

#Output file
[OUTPUT]=/nfs/svaprojects/vcf/genome/vcf_mchd002A2_recal.sva

[PEDINFO]=/nfs/svaprojects/master.ped
[COLLECTNONCARRIERINFO]=OFF

#######################################################################
#The following section lists the inputs and outputs used in the annotation. You need to specify your files.
#######################################################################

#Whole-exome sequence samples#

[INPUT]=mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/var_flt_vcf.snp.filtered.vcf
[COVERAGE]=1,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.1.bco
[COVERAGE]=2,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.2.bco
[COVERAGE]=3,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.3.bco
[COVERAGE]=4,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.4.bco
[COVERAGE]=5,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.5.bco
[COVERAGE]=6,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.6.bco
[COVERAGE]=7,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.7.bco
[COVERAGE]=8,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.8.bco
[COVERAGE]=9,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.9.bco
[COVERAGE]=10,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.10.bco
[COVERAGE]=11,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.11.bco
[COVERAGE]=12,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.12.bco
[COVERAGE]=13,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.13.bco
[COVERAGE]=14,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.14.bco
[COVERAGE]=15,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.15.bco
[COVERAGE]=16,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.16.bco
[COVERAGE]=17,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.17.bco
[COVERAGE]=18,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.18.bco
[COVERAGE]=19,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.19.bco
[COVERAGE]=20,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.20.bco
[COVERAGE]=21,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.21.bco
[COVERAGE]=22,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.22.bco
[COVERAGE]=X,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.X.bco
[COVERAGE]=Y,mchd002A2,/nfs/svaprojects/vcf/genome/mchd002A2/bco/mchd002A2_.Y.bco

[HMMSVMINLOD]=0

###########################################################################
#The following section lists an genome buffering option. Enabling this option will speedup the loading an
#integrated genome browser in this software. You will need to specify a file to store the buffered information.
###########################################################################

#Below is an example of a buffering file:
#[GenomeBrowserBuffer]=E:projectsSequenceVariantAnalyzerdatachr6smallTest.gbb
[GenomeBrowserBufferSwitch]=on
[GenomeBrowserBuffer]=/local2/dg48/projects/sequencing/gbb/cns.gb2

##################################################################
#The following section lists the annotation options.
##################################################################

#If specified as “on”, this option tells SequenceVariantAnalyzer to skip the annotation procedures
# and directly load the binary output instead.

[SKIPANNOTATION]=on

#Define upstream/downstream span
[UPSTREAM]=1000
[DOWNSTREAM]=1000
#Define intronic exon boundary. Intronic SNPs located within specified number of bases from
# exon boundary will fall into this category.
#This number has to be greater than 2.
[INTRON_EXON_BOUNDARY_INTO_INTRON]=8
[INTRON_EXON_BOUNDARY_INTO_EXON]=3
#Define threshold to group structural variations with recorded DGV CNVs – defualt is 50%.
[DGVCNVOVERLAP]=0.50
#Specify comprehensive or abbreviated annotation output. Note: comprehensive annotations include functionality for all relevant transcripts,
#while abbreviated output only includes the potentially most important one, but at a risk of losing information.
[DETAILED_OUTPUT]=on

############################################################################
#The following section lists the databases used in the annotation. You do not need to change them unless you
# need to use other versions of the databases.
############################################################################

#Reference database version
[COREVERSION]=50_36l

#Reference sequence, could be multiple lines, binary
[REFBIN]=1,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.1.fa.bin
[REFBIN]=2,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.2.fa.bin
[REFBIN]=3,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.3.fa.bin
[REFBIN]=4,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.4.fa.bin
[REFBIN]=5,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.5.fa.bin
[REFBIN]=6,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.6.fa.bin
[REFBIN]=7,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.7.fa.bin
[REFBIN]=8,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.8.fa.bin
[REFBIN]=9,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.9.fa.bin
[REFBIN]=10,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.10.fa.bin
[REFBIN]=11,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.11.fa.bin
[REFBIN]=12,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.12.fa.bin
[REFBIN]=13,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.13.fa.bin
[REFBIN]=14,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.14.fa.bin
[REFBIN]=15,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.15.fa.bin
[REFBIN]=16,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.16.fa.bin
[REFBIN]=17,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.17.fa.bin
[REFBIN]=18,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.18.fa.bin
[REFBIN]=19,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.19.fa.bin
[REFBIN]=20,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.20.fa.bin
[REFBIN]=21,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.21.fa.bin
[REFBIN]=22,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.22.fa.bin
[REFBIN]=X,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.X.fa.bin
[REFBIN]=Y,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.Y.fa.bin
[REFBIN]=MT,./datasource/ensembl/50_36l/Homo_sapiens.NCBI36.50.dna.chromosome.MT.fa.bin

#RepeatMasker result folder
[REPEATMASKERFOLDER]=./datasource/repeatMasker

#A switch to turn on/off function for generating ‘N’ bases in reference sequence
[REGENERATEREFNCALLS]=OFF

#The following are the annotation databases
#Core
[GENEID]=./datasource/ensembl/50_36l/gene_stable_id.txt
[TRANSCRIPT]=./datasource/ensembl/50_36l/transcript.txt
[TRANSCRIPTID]=./datasource/ensembl/50_36l/transcript_stable_id.txt
[TRANSLATION]=./datasource/ensembl/50_36l/translation.txt
[EXON]=./datasource/ensembl/50_36l/exon.txt
[EXONTRANSCRIPT]=./datasource/ensembl/50_36l/exon_transcript.txt
[XREF]=./datasource/ensembl/50_36l/xref.txt
[OBJECTXREF]=./datasource/ensembl/50_36l/object_xref.txt
[GENE]=./datasource/ensembl/50_36l/gene.txt
[SEQREGION]=./datasource/ensembl/50_36l/seq_region.txt
#GO
[GO]=./datasource/ensembl/50_36l/term.txt

#Existing variations
[VARIATION]=./datasource/ensembl/50_36l/variation.txt
[VARIATIONFEATURE1]=./datasource/ensembl/50_36l/variation_feature.001.txt
[VARIATIONFEATURE2]=./datasource/ensembl/50_36l/variation_feature.002.txt
[VARIATIONALLELE1]=./datasource/ensembl/50_36l/allele.001.txt
[VARIATIONALLELE2]=./datasource/ensembl/50_36l/allele.002.txt
[VARIATIONSAMPLE]=./datasource/ensembl/50_36l/sample.txt

#Protein sequence variation annotation (pre-calculated)
[PROTEINVAR]=./datasource/proteinpoly/mapp.out
#Hapmap
[HAPMAP]=./datasource/hapmap/hapmap_r23a.bim
#Illumina 1M chip
[ILLUMINA1M]=./datasource/illumina/Human1Mv1_snptable.txt
#CNV
[DGVCNV]=./datasource/CNV/DGV/variation.hg18.v3.txt
[CNVTAGGING]=./datasource/CNV/tagging/CNV_tagging_SNPs.txt
#KEGG pathway files
[KEGGMAPTITLE]=./datasource/kegg/Aug2508/map_title.tab
[KEGGGENEPATHWAY]=./datasource/kegg/Aug2508/genes_pathway.list
#Venter’s variants
[VENTERSNPS]=./datasource/venter/HuRef.InternalHuRef-NCBI.snp.bin
[VENTERINDELS]=./datasource/venter/HuRef.InternalHuRef-NCBI.indel.bin
#1000 Human Genome SNP
[1000HUMANGENOMESNP]=./datasource/1kgenome/CEU.frequency.filter
#OMIM Gene map
[OMIMGENEMAP]=./datasource/omim/genemap

#An *optional* protein sequence output
#[PROTEINFASTAOUTPUTFOLDER]=./datasource/ensembl/50_36l/proteinfasta

#######################################################
#End of the SequenceVariantAnalyzer project script file.
#######################################################

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.